Skip to main content

microsandbox_agentd/
session.rs

1//! Exec session management: spawning processes with PTY or pipe I/O.
2
3use std::ffi::{CStr, CString};
4use std::mem::MaybeUninit;
5use std::os::fd::{AsRawFd, FromRawFd, OwnedFd, RawFd};
6use std::process::Stdio;
7use std::{iter, mem, ptr};
8
9use nix::pty;
10use nix::sys::signal::{self, Signal};
11use nix::unistd::Pid;
12use tokio::io::AsyncReadExt;
13use tokio::process::{Child, Command};
14use tokio::sync::mpsc;
15
16use microsandbox_protocol::exec::{ExecFailed, ExecFailureKind, ExecRequest};
17
18use crate::error::{AgentdError, AgentdResult};
19use crate::rlimit;
20
21//--------------------------------------------------------------------------------------------------
22// Functions: classify
23//--------------------------------------------------------------------------------------------------
24
25/// Map an `errno` integer to its standard symbolic name. Returns
26/// `None` for unrecognized values; we only enumerate the ones that
27/// can plausibly come out of fork/exec/setrlimit/setuid paths.
28fn errno_name(e: i32) -> Option<&'static str> {
29    match e {
30        libc::E2BIG => Some("E2BIG"),
31        libc::EACCES => Some("EACCES"),
32        libc::EAGAIN => Some("EAGAIN"),
33        libc::EBUSY => Some("EBUSY"),
34        libc::EFAULT => Some("EFAULT"),
35        libc::EINVAL => Some("EINVAL"),
36        libc::EIO => Some("EIO"),
37        libc::EISDIR => Some("EISDIR"),
38        libc::ELOOP => Some("ELOOP"),
39        libc::EMFILE => Some("EMFILE"),
40        libc::ENAMETOOLONG => Some("ENAMETOOLONG"),
41        libc::ENFILE => Some("ENFILE"),
42        libc::ENOENT => Some("ENOENT"),
43        libc::ENOEXEC => Some("ENOEXEC"),
44        libc::ENOMEM => Some("ENOMEM"),
45        libc::ENOSYS => Some("ENOSYS"),
46        libc::ENOTDIR => Some("ENOTDIR"),
47        libc::ENXIO => Some("ENXIO"),
48        libc::EPERM => Some("EPERM"),
49        libc::ETXTBSY => Some("ETXTBSY"),
50        _ => None,
51    }
52}
53
54/// Classify a fork/exec-time `errno` into one of the
55/// `ExecFailureKind` buckets.
56///
57/// ENOENT is ambiguous in principle (missing binary vs. missing
58/// cwd), but in practice it's overwhelmingly the binary — the cwd
59/// is set in `pre_exec` *before* execvp, and a bad cwd would more
60/// commonly produce ENOTDIR (path component isn't a directory) or
61/// EACCES (no permission to chdir). We classify ENOENT as
62/// `NotFound` and ENOTDIR as `BadCwd`. Edge cases of "bad cwd that
63/// happens to ENOENT" fall through with the message "spawn 'cmd':
64/// No such file or directory" which is still understandable.
65fn classify_spawn_errno(errno: i32) -> ExecFailureKind {
66    match errno {
67        libc::ENOENT => ExecFailureKind::NotFound,
68        libc::ENOTDIR => ExecFailureKind::BadCwd,
69        libc::EACCES | libc::EPERM => ExecFailureKind::PermissionDenied,
70        libc::ENOEXEC => ExecFailureKind::NotExecutable,
71        libc::EISDIR => ExecFailureKind::NotExecutable,
72        libc::ETXTBSY => ExecFailureKind::NotExecutable,
73        libc::E2BIG | libc::ELOOP | libc::ENAMETOOLONG | libc::EFAULT => ExecFailureKind::BadArgs,
74        libc::EMFILE | libc::ENFILE => ExecFailureKind::ResourceLimit,
75        libc::EAGAIN => ExecFailureKind::ResourceLimit,
76        libc::ENOMEM => ExecFailureKind::OutOfMemory,
77        libc::EINVAL => ExecFailureKind::Other,
78        _ => ExecFailureKind::Other,
79    }
80}
81
82/// Build a `ExecFailed` payload from a spawn-time `io::Error`.
83fn exec_failed_from_io_error(err: &std::io::Error, cmd: &str, stage: &str) -> ExecFailed {
84    let errno = err.raw_os_error();
85    let kind = errno
86        .map(classify_spawn_errno)
87        .unwrap_or(ExecFailureKind::Other);
88    let errno_name = errno.and_then(errno_name).map(str::to_string);
89    let message = format!("spawn {cmd:?}: {err}");
90    ExecFailed {
91        kind,
92        errno,
93        errno_name,
94        message,
95        stage: Some(stage.to_string()),
96    }
97}
98
99//--------------------------------------------------------------------------------------------------
100// Types
101//--------------------------------------------------------------------------------------------------
102
103/// An active exec session handle for sending input to a running process.
104///
105/// Output reading is handled by a background task that sends events
106/// via the `mpsc` channel provided at spawn time.
107#[derive(Debug)]
108pub struct ExecSession {
109    /// The PID of the spawned process.
110    pid: i32,
111
112    /// The PTY master fd (only for PTY mode, used for writing and resize).
113    pty_master: Option<OwnedFd>,
114
115    /// The child's stdin (only for pipe mode).
116    stdin: Option<tokio::process::ChildStdin>,
117}
118
119/// Output from a session that the agent loop should forward to the host.
120pub enum SessionOutput {
121    /// Data from stdout (or PTY master).
122    Stdout(Vec<u8>),
123
124    /// Data from stderr (pipe mode only).
125    Stderr(Vec<u8>),
126
127    /// The process has exited with the given code.
128    Exited(i32),
129
130    /// Pre-encoded frame bytes to write directly to the serial output buffer.
131    ///
132    /// Used by filesystem streaming operations that encode their own
133    /// `FsData`/`FsResponse` messages.
134    Raw(Vec<u8>),
135}
136
137struct ResolvedUser {
138    uid: libc::uid_t,
139    gid: libc::gid_t,
140    initgroups_user: Option<CString>,
141    home_dir: Option<CString>,
142}
143
144struct PasswdEntry {
145    name: String,
146    uid: libc::uid_t,
147    gid: libc::gid_t,
148    home_dir: Option<String>,
149}
150
151struct GroupEntry {
152    gid: libc::gid_t,
153}
154
155struct ExecErrorPipe {
156    read_end: OwnedFd,
157    write_end: OwnedFd,
158}
159
160//--------------------------------------------------------------------------------------------------
161// Methods
162//--------------------------------------------------------------------------------------------------
163
164impl ExecSession {
165    /// Spawns a new exec session.
166    ///
167    /// If `req.tty` is true, uses a PTY. Otherwise, uses piped stdin/stdout/stderr.
168    /// A background task is spawned to read output and send events via `tx`.
169    pub fn spawn(
170        id: u32,
171        req: &ExecRequest,
172        tx: mpsc::UnboundedSender<(u32, SessionOutput)>,
173        default_user: Option<&str>,
174    ) -> AgentdResult<Self> {
175        if req.tty {
176            Self::spawn_pty(id, req, tx, default_user)
177        } else {
178            Self::spawn_pipe(id, req, tx, default_user)
179        }
180    }
181
182    /// Returns the PID of the spawned process (as u32 for the protocol).
183    pub fn pid(&self) -> u32 {
184        self.pid as u32
185    }
186
187    /// Writes data to the process's stdin (or PTY master).
188    pub async fn write_stdin(&self, data: &[u8]) -> AgentdResult<()> {
189        if let Some(ref master) = self.pty_master {
190            blocking_write_fd(master.as_raw_fd(), data).await
191        } else if let Some(ref stdin) = self.stdin {
192            blocking_write_fd(stdin.as_raw_fd(), data).await
193        } else {
194            Ok(())
195        }
196    }
197
198    /// Resizes the PTY (only applicable for TTY sessions).
199    pub fn resize(&self, rows: u16, cols: u16) -> AgentdResult<()> {
200        if let Some(ref master) = self.pty_master {
201            let ws = libc::winsize {
202                ws_row: rows,
203                ws_col: cols,
204                ws_xpixel: 0,
205                ws_ypixel: 0,
206            };
207            let ret = unsafe { libc::ioctl(master.as_raw_fd(), libc::TIOCSWINSZ, &ws) };
208            if ret < 0 {
209                return Err(std::io::Error::last_os_error().into());
210            }
211        }
212        Ok(())
213    }
214
215    /// Sends a signal to the spawned process.
216    pub fn send_signal(&self, signum: i32) -> AgentdResult<()> {
217        let sig = Signal::try_from(signum)
218            .map_err(|e| AgentdError::ExecSession(format!("invalid signal {signum}: {e}")))?;
219        signal::kill(Pid::from_raw(self.pid), sig)?;
220        Ok(())
221    }
222
223    /// Closes the process's stdin.
224    ///
225    /// For pipe mode, drops the `ChildStdin` handle which closes the fd.
226    /// For PTY mode, this is a no-op (the PTY master stays open for output).
227    pub fn close_stdin(&mut self) {
228        self.stdin.take();
229    }
230}
231
232impl ExecSession {
233    /// Spawns a process with a PTY.
234    fn spawn_pty(
235        id: u32,
236        req: &ExecRequest,
237        tx: mpsc::UnboundedSender<(u32, SessionOutput)>,
238        default_user: Option<&str>,
239    ) -> AgentdResult<Self> {
240        let pty = pty::openpty(None, None)?;
241        let err_pipe = new_exec_error_pipe()?;
242
243        // Set initial window size.
244        let ws = libc::winsize {
245            ws_row: req.rows,
246            ws_col: req.cols,
247            ws_xpixel: 0,
248            ws_ypixel: 0,
249        };
250        let ret = unsafe { libc::ioctl(pty.master.as_raw_fd(), libc::TIOCSWINSZ, &ws) };
251        if ret < 0 {
252            return Err(std::io::Error::last_os_error().into());
253        }
254
255        let slave_fd = pty.slave.as_raw_fd();
256
257        // Pre-build all strings before fork to avoid allocating in the child.
258        let c_cmd = CString::new(req.cmd.as_str())
259            .map_err(|e| AgentdError::ExecSession(format!("invalid command: {e}")))?;
260        let mut c_args: Vec<CString> = vec![c_cmd.clone()];
261        for arg in &req.args {
262            c_args.push(
263                CString::new(arg.as_str())
264                    .map_err(|e| AgentdError::ExecSession(format!("invalid arg: {e}")))?,
265            );
266        }
267
268        // Build argv pointer array (null-terminated).
269        let argv_ptrs: Vec<*const libc::c_char> = c_args
270            .iter()
271            .map(|s| s.as_ptr())
272            .chain(iter::once(ptr::null()))
273            .collect();
274
275        // Pre-parse environment variables into CStrings.
276        let c_env: Vec<(CString, CString)> = req
277            .env
278            .iter()
279            .filter_map(|var| {
280                let (key, val) = var.split_once('=')?;
281                let k = CString::new(key).ok()?;
282                let v = CString::new(val).ok()?;
283                Some((k, v))
284            })
285            .collect();
286
287        // Pre-build cwd CString.
288        let c_cwd = req
289            .cwd
290            .as_ref()
291            .map(|dir| CString::new(dir.as_str()))
292            .transpose()
293            .map_err(|e| AgentdError::ExecSession(format!("invalid cwd: {e}")))?;
294
295        let resolved_user = resolve_requested_user(req, default_user)?;
296        let default_home = default_home_dir(req, resolved_user.as_ref()).map(CStr::to_owned);
297        let home_key = default_home
298            .as_ref()
299            .map(|_| {
300                CString::new("HOME")
301                    .map_err(|e| AgentdError::ExecSession(format!("invalid home env key: {e}")))
302            })
303            .transpose()?;
304
305        // Pre-parse rlimits before fork (no allocations in child).
306        let parsed_rlimits = rlimit::to_libc(&req.rlimits);
307
308        // Fork.
309        let pid = unsafe { libc::fork() };
310        if pid < 0 {
311            let io_err = std::io::Error::last_os_error();
312            return Err(AgentdError::ExecSpawnFailed(exec_failed_from_io_error(
313                &io_err, &req.cmd, "fork",
314            )));
315        }
316
317        #[allow(unreachable_code)]
318        if pid == 0 {
319            // Child process — only async-signal-safe operations from here.
320            drop(pty.master);
321            drop(err_pipe.read_end);
322
323            // Create new session.
324            if unsafe { libc::setsid() } < 0 {
325                unsafe { libc::_exit(1) };
326            }
327
328            // Set controlling terminal.
329            if unsafe { libc::ioctl(slave_fd, libc::TIOCSCTTY, 0) } < 0 {
330                unsafe { libc::_exit(1) };
331            }
332
333            // Dup slave to stdin/stdout/stderr.
334            unsafe {
335                if libc::dup2(slave_fd, 0) < 0 {
336                    libc::_exit(1);
337                }
338                if libc::dup2(slave_fd, 1) < 0 {
339                    libc::_exit(1);
340                }
341                if libc::dup2(slave_fd, 2) < 0 {
342                    libc::_exit(1);
343                }
344                if slave_fd > 2 {
345                    libc::close(slave_fd);
346                }
347            }
348
349            // Set environment variables using pre-built CStrings.
350            for (key, val) in &c_env {
351                unsafe {
352                    libc::setenv(key.as_ptr(), val.as_ptr(), 1);
353                }
354            }
355
356            // Set working directory.
357            if let Some(ref dir) = c_cwd {
358                unsafe {
359                    libc::chdir(dir.as_ptr());
360                }
361            }
362
363            if let Some(ref user) = resolved_user
364                && apply_resolved_user(user).is_err()
365            {
366                unsafe { libc::_exit(1) };
367            }
368
369            if let (Some(key), Some(home)) = (&home_key, &default_home) {
370                unsafe {
371                    libc::setenv(key.as_ptr(), home.as_ptr(), 1);
372                }
373            }
374
375            // Apply resource limits.
376            for (resource, limit) in &parsed_rlimits {
377                if unsafe { libc::setrlimit(*resource as _, limit) } != 0 {
378                    unsafe { libc::_exit(1) };
379                }
380            }
381
382            // execvp — on success this never returns.
383            unsafe {
384                libc::execvp(argv_ptrs[0], argv_ptrs.as_ptr());
385            }
386
387            // If execvp returns, it failed.
388            write_exec_error_and_exit(err_pipe.write_end.as_raw_fd());
389        }
390
391        // Parent process.
392        drop(pty.slave);
393        drop(err_pipe.write_end);
394
395        if let Some(exec_errno) = read_exec_error(err_pipe.read_end.as_raw_fd())? {
396            let _ = wait_for_exec_failure_child(pid);
397            let io_err = std::io::Error::from_raw_os_error(exec_errno);
398            return Err(AgentdError::ExecSpawnFailed(exec_failed_from_io_error(
399                &io_err, &req.cmd, "execvp",
400            )));
401        }
402
403        // Dup the master fd for the reader task.
404        let reader_fd = unsafe { libc::dup(pty.master.as_raw_fd()) };
405        if reader_fd < 0 {
406            return Err(std::io::Error::last_os_error().into());
407        }
408        let reader_fd = unsafe { OwnedFd::from_raw_fd(reader_fd) };
409
410        // Spawn background reader task.
411        tokio::spawn(pty_reader_task(id, pid, reader_fd, tx));
412
413        Ok(Self {
414            pid,
415            pty_master: Some(pty.master),
416            stdin: None,
417        })
418    }
419
420    /// Spawns a process with piped stdio.
421    fn spawn_pipe(
422        id: u32,
423        req: &ExecRequest,
424        tx: mpsc::UnboundedSender<(u32, SessionOutput)>,
425        default_user: Option<&str>,
426    ) -> AgentdResult<Self> {
427        let mut cmd = Command::new(&req.cmd);
428        cmd.args(&req.args)
429            .stdin(Stdio::piped())
430            .stdout(Stdio::piped())
431            .stderr(Stdio::piped());
432
433        for var in &req.env {
434            if let Some((key, val)) = var.split_once('=') {
435                cmd.env(key, val);
436            }
437        }
438
439        if let Some(ref dir) = req.cwd {
440            cmd.current_dir(dir);
441        }
442
443        let resolved_user = resolve_requested_user(req, default_user)?;
444        if let Some(home) = default_home_dir(req, resolved_user.as_ref()) {
445            cmd.env("HOME", home.to_string_lossy().into_owned());
446        }
447
448        // Apply resource limits in the child before exec.
449        let parsed_rlimits = rlimit::to_libc(&req.rlimits);
450        if resolved_user.is_some() || !parsed_rlimits.is_empty() {
451            unsafe {
452                cmd.pre_exec(move || {
453                    if let Some(ref user) = resolved_user {
454                        apply_resolved_user(user).map_err(agentd_to_io_error)?;
455                    }
456                    for (resource, limit) in &parsed_rlimits {
457                        if libc::setrlimit(*resource as _, limit) != 0 {
458                            return Err(std::io::Error::last_os_error());
459                        }
460                    }
461                    Ok(())
462                });
463            }
464        }
465
466        let cmd_label = req.cmd.clone();
467        let mut child = cmd.spawn().map_err(|err| {
468            AgentdError::ExecSpawnFailed(exec_failed_from_io_error(
469                &err,
470                &cmd_label,
471                "Command::spawn",
472            ))
473        })?;
474        let pid = child.id().unwrap_or(0) as i32;
475        let stdin = child.stdin.take();
476        let stdout = child.stdout.take();
477        let stderr = child.stderr.take();
478
479        // Spawn background reader task.
480        tokio::spawn(pipe_reader_task(id, child, stdout, stderr, tx));
481
482        Ok(Self {
483            pid,
484            pty_master: None,
485            stdin,
486        })
487    }
488}
489
490//--------------------------------------------------------------------------------------------------
491// Functions
492//--------------------------------------------------------------------------------------------------
493
494fn new_exec_error_pipe() -> AgentdResult<ExecErrorPipe> {
495    let mut fds = [0; 2];
496    let ret = unsafe { libc::pipe2(fds.as_mut_ptr(), libc::O_CLOEXEC) };
497    if ret != 0 {
498        return Err(std::io::Error::last_os_error().into());
499    }
500
501    Ok(ExecErrorPipe {
502        read_end: unsafe { OwnedFd::from_raw_fd(fds[0]) },
503        write_end: unsafe { OwnedFd::from_raw_fd(fds[1]) },
504    })
505}
506
507fn write_exec_error_and_exit(err_fd: RawFd) -> ! {
508    let errno = unsafe { *libc::__errno_location() };
509    let bytes = errno.to_ne_bytes();
510    let _ = unsafe { libc::write(err_fd, bytes.as_ptr() as *const libc::c_void, bytes.len()) };
511    unsafe { libc::_exit(127) }
512}
513
514fn read_exec_error(err_fd: RawFd) -> AgentdResult<Option<i32>> {
515    let mut buf = [0u8; mem::size_of::<i32>()];
516    let n = unsafe { libc::read(err_fd, buf.as_mut_ptr() as *mut libc::c_void, buf.len()) };
517    if n < 0 {
518        return Err(std::io::Error::last_os_error().into());
519    }
520    if n == 0 {
521        return Ok(None);
522    }
523    if n as usize != buf.len() {
524        return Err(AgentdError::ExecSession(format!(
525            "short exec error report: expected {} bytes, got {n}",
526            buf.len()
527        )));
528    }
529    Ok(Some(i32::from_ne_bytes(buf)))
530}
531
532fn wait_for_exec_failure_child(pid: i32) -> AgentdResult<()> {
533    let ret = unsafe { libc::waitpid(pid, ptr::null_mut(), 0) };
534    if ret < 0 {
535        return Err(std::io::Error::last_os_error().into());
536    }
537    Ok(())
538}
539
540fn resolve_requested_user(
541    req: &ExecRequest,
542    default_user: Option<&str>,
543) -> AgentdResult<Option<ResolvedUser>> {
544    let requested = req
545        .user
546        .as_deref()
547        .map(str::trim)
548        .filter(|value| !value.is_empty())
549        .or(default_user);
550
551    requested.map(resolve_user_spec).transpose()
552}
553
554fn resolve_user_spec(spec: &str) -> AgentdResult<ResolvedUser> {
555    let (user_part, group_part) = match spec.split_once(':') {
556        Some((user, group)) => (user.trim(), Some(group.trim())),
557        None => (spec.trim(), None),
558    };
559
560    if user_part.is_empty() {
561        return Err(AgentdError::ExecSession("user spec has empty user".into()));
562    }
563
564    let passwd = if let Ok(uid) = parse_id(user_part) {
565        lookup_passwd_by_uid(uid)?
566    } else {
567        lookup_passwd_by_name(user_part)?
568            .ok_or_else(|| AgentdError::ExecSession(format!("guest user not found: {user_part}")))?
569            .into()
570    };
571
572    let (uid, passwd_entry) = match passwd {
573        ResolvedUserLookup::Known(entry) => (entry.uid, Some(entry)),
574        ResolvedUserLookup::Numeric(uid) => (uid, None),
575    };
576
577    let gid = match group_part {
578        Some("") => {
579            return Err(AgentdError::ExecSession("user spec has empty group".into()));
580        }
581        Some(group) => resolve_group_spec(group)?,
582        None => passwd_entry
583            .as_ref()
584            .map(|entry| entry.gid)
585            .unwrap_or_else(|| unsafe { libc::getgid() }),
586    };
587
588    let initgroups_user = passwd_entry
589        .as_ref()
590        .map(|entry| CString::new(entry.name.as_str()))
591        .transpose()
592        .map_err(|e| AgentdError::ExecSession(format!("invalid guest user name: {e}")))?;
593
594    Ok(ResolvedUser {
595        uid,
596        gid,
597        initgroups_user,
598        home_dir: passwd_entry
599            .as_ref()
600            .and_then(|entry| entry.home_dir.as_deref())
601            .map(CString::new)
602            .transpose()
603            .map_err(|e| AgentdError::ExecSession(format!("invalid guest home directory: {e}")))?,
604    })
605}
606
607enum ResolvedUserLookup {
608    Known(PasswdEntry),
609    Numeric(libc::uid_t),
610}
611
612impl From<PasswdEntry> for ResolvedUserLookup {
613    fn from(value: PasswdEntry) -> Self {
614        Self::Known(value)
615    }
616}
617
618fn resolve_group_spec(spec: &str) -> AgentdResult<libc::gid_t> {
619    if let Ok(gid) = parse_id(spec) {
620        return Ok(gid);
621    }
622
623    lookup_group_by_name(spec)?
624        .map(|entry| entry.gid)
625        .ok_or_else(|| AgentdError::ExecSession(format!("guest group not found: {spec}")))
626}
627
628fn parse_id(value: &str) -> Result<u32, std::num::ParseIntError> {
629    value.parse::<u32>()
630}
631
632fn lookup_passwd_by_name(name: &str) -> AgentdResult<Option<PasswdEntry>> {
633    let name = CString::new(name)
634        .map_err(|e| AgentdError::ExecSession(format!("invalid guest user name: {e}")))?;
635    let mut pwd = MaybeUninit::<libc::passwd>::uninit();
636    let mut result = ptr::null_mut();
637    let mut buf = vec![0u8; lookup_buffer_len()];
638    let rc = unsafe {
639        libc::getpwnam_r(
640            name.as_ptr(),
641            pwd.as_mut_ptr(),
642            buf.as_mut_ptr().cast(),
643            buf.len(),
644            &mut result,
645        )
646    };
647    if rc != 0 {
648        return Err(AgentdError::ExecSession(format!(
649            "failed to resolve guest user {name:?}: {}",
650            std::io::Error::from_raw_os_error(rc)
651        )));
652    }
653    if result.is_null() {
654        return Ok(None);
655    }
656
657    let pwd = unsafe { pwd.assume_init() };
658    let name = unsafe { CStr::from_ptr(pwd.pw_name) }
659        .to_string_lossy()
660        .into_owned();
661    let home_dir = unsafe { CStr::from_ptr(pwd.pw_dir) }
662        .to_string_lossy()
663        .into_owned();
664    Ok(Some(PasswdEntry {
665        name,
666        uid: pwd.pw_uid,
667        gid: pwd.pw_gid,
668        home_dir: (!home_dir.is_empty()).then_some(home_dir),
669    }))
670}
671
672fn lookup_passwd_by_uid(uid: libc::uid_t) -> AgentdResult<ResolvedUserLookup> {
673    let mut pwd = MaybeUninit::<libc::passwd>::uninit();
674    let mut result = ptr::null_mut();
675    let mut buf = vec![0u8; lookup_buffer_len()];
676    let rc = unsafe {
677        libc::getpwuid_r(
678            uid,
679            pwd.as_mut_ptr(),
680            buf.as_mut_ptr().cast(),
681            buf.len(),
682            &mut result,
683        )
684    };
685    if rc != 0 {
686        return Err(AgentdError::ExecSession(format!(
687            "failed to resolve guest uid {uid}: {}",
688            std::io::Error::from_raw_os_error(rc)
689        )));
690    }
691    if result.is_null() {
692        return Ok(ResolvedUserLookup::Numeric(uid));
693    }
694
695    let pwd = unsafe { pwd.assume_init() };
696    let name = unsafe { CStr::from_ptr(pwd.pw_name) }
697        .to_string_lossy()
698        .into_owned();
699    let home_dir = unsafe { CStr::from_ptr(pwd.pw_dir) }
700        .to_string_lossy()
701        .into_owned();
702    Ok(ResolvedUserLookup::Known(PasswdEntry {
703        name,
704        uid: pwd.pw_uid,
705        gid: pwd.pw_gid,
706        home_dir: (!home_dir.is_empty()).then_some(home_dir),
707    }))
708}
709
710fn lookup_group_by_name(name: &str) -> AgentdResult<Option<GroupEntry>> {
711    let name = CString::new(name)
712        .map_err(|e| AgentdError::ExecSession(format!("invalid guest group name: {e}")))?;
713    let mut grp = MaybeUninit::<libc::group>::uninit();
714    let mut result = ptr::null_mut();
715    let mut buf = vec![0u8; lookup_buffer_len()];
716    let rc = unsafe {
717        libc::getgrnam_r(
718            name.as_ptr(),
719            grp.as_mut_ptr(),
720            buf.as_mut_ptr().cast(),
721            buf.len(),
722            &mut result,
723        )
724    };
725    if rc != 0 {
726        return Err(AgentdError::ExecSession(format!(
727            "failed to resolve guest group {name:?}: {}",
728            std::io::Error::from_raw_os_error(rc)
729        )));
730    }
731    if result.is_null() {
732        return Ok(None);
733    }
734
735    let grp = unsafe { grp.assume_init() };
736    Ok(Some(GroupEntry { gid: grp.gr_gid }))
737}
738
739fn lookup_buffer_len() -> usize {
740    let size = unsafe { libc::sysconf(libc::_SC_GETPW_R_SIZE_MAX) };
741    if size > 0 { size as usize } else { 16 * 1024 }
742}
743
744fn apply_resolved_user(user: &ResolvedUser) -> AgentdResult<()> {
745    if let Some(ref name) = user.initgroups_user {
746        if unsafe { libc::initgroups(name.as_ptr(), user.gid) } != 0 {
747            return Err(std::io::Error::last_os_error().into());
748        }
749    } else if unsafe { libc::setgroups(0, ptr::null()) } != 0 {
750        return Err(std::io::Error::last_os_error().into());
751    }
752
753    if unsafe { libc::setgid(user.gid) } != 0 {
754        return Err(std::io::Error::last_os_error().into());
755    }
756    if unsafe { libc::setuid(user.uid) } != 0 {
757        return Err(std::io::Error::last_os_error().into());
758    }
759
760    Ok(())
761}
762
763fn default_home_dir<'a>(req: &ExecRequest, user: Option<&'a ResolvedUser>) -> Option<&'a CStr> {
764    if env_contains_key(&req.env, "HOME") {
765        return None;
766    }
767
768    user.and_then(|user| user.home_dir.as_deref())
769}
770
771fn env_contains_key(env: &[String], key: &str) -> bool {
772    env.iter().any(|entry| {
773        entry
774            .split_once('=')
775            .map(|(entry_key, _)| entry_key == key)
776            .unwrap_or(false)
777    })
778}
779
780fn agentd_to_io_error(err: AgentdError) -> std::io::Error {
781    std::io::Error::other(err.to_string())
782}
783
784/// Writes data to a raw fd using a blocking task, handling short writes.
785async fn blocking_write_fd(fd: RawFd, data: &[u8]) -> AgentdResult<()> {
786    let data = data.to_vec();
787    tokio::task::spawn_blocking(move || {
788        let mut written = 0;
789        while written < data.len() {
790            let ptr = unsafe { data.as_ptr().add(written) as *const libc::c_void };
791            let ret = unsafe { libc::write(fd, ptr, data.len() - written) };
792            if ret < 0 {
793                let err = std::io::Error::last_os_error();
794                let code = err.raw_os_error();
795                if code == Some(libc::EAGAIN) || code == Some(libc::EWOULDBLOCK) {
796                    wait_fd_writable(fd)?;
797                    continue;
798                }
799                if code == Some(libc::EINTR) {
800                    continue;
801                }
802                return Err(AgentdError::Io(err));
803            }
804            if ret == 0 {
805                wait_fd_writable(fd)?;
806                continue;
807            }
808            written += ret as usize;
809        }
810        Ok(())
811    })
812    .await
813    .map_err(|e| AgentdError::ExecSession(format!("stdin write join error: {e}")))?
814}
815
816fn wait_fd_writable(fd: RawFd) -> AgentdResult<()> {
817    let mut pollfd = libc::pollfd {
818        fd,
819        events: libc::POLLOUT,
820        revents: 0,
821    };
822
823    loop {
824        let ret = unsafe { libc::poll(&mut pollfd, 1, -1) };
825        if ret < 0 {
826            let err = std::io::Error::last_os_error();
827            if err.raw_os_error() == Some(libc::EINTR) {
828                continue;
829            }
830            return Err(AgentdError::Io(err));
831        }
832        if ret == 0 {
833            continue;
834        }
835        // Any positive return means the fd is actionable: POLLOUT lets the
836        // next write make progress, and POLLHUP/POLLERR/POLLNVAL will cause
837        // the next write to fail with a real errno (typically EPIPE) which
838        // is more meaningful than poll's revents.
839        return Ok(());
840    }
841}
842
843/// Background task that reads from a PTY master fd and sends output events.
844async fn pty_reader_task(
845    id: u32,
846    pid: i32,
847    master_fd: OwnedFd,
848    tx: mpsc::UnboundedSender<(u32, SessionOutput)>,
849) {
850    let tx_output = tx.clone();
851    let read_result = tokio::task::spawn_blocking(move || {
852        // PTY masters are safer with a dedicated blocking read loop than with
853        // edge-driven readiness. Fast writers followed by process exit can
854        // strand the tail behind a missed wakeup/HUP transition.
855        let raw = master_fd.as_raw_fd();
856        let flags = unsafe { libc::fcntl(raw, libc::F_GETFL) };
857        if flags >= 0 {
858            unsafe { libc::fcntl(raw, libc::F_SETFL, flags & !libc::O_NONBLOCK) };
859        }
860
861        loop {
862            let mut buf = [0u8; 4096];
863            let n = unsafe { libc::read(raw, buf.as_mut_ptr() as *mut libc::c_void, buf.len()) };
864
865            if n > 0 {
866                if tx_output
867                    .send((id, SessionOutput::Stdout(buf[..n as usize].to_vec())))
868                    .is_err()
869                {
870                    break;
871                }
872                continue;
873            }
874
875            if n == 0 {
876                break;
877            }
878
879            let err = std::io::Error::last_os_error();
880            match err.raw_os_error() {
881                Some(libc::EINTR) => continue,
882                Some(libc::EIO) => break,
883                _ => break,
884            }
885        }
886    })
887    .await;
888
889    let _ = read_result;
890
891    let code = wait_for_pid(pid).await;
892    let _ = tx.send((id, SessionOutput::Exited(code)));
893}
894
895/// Background task that reads from piped stdout/stderr and sends output events.
896async fn pipe_reader_task(
897    id: u32,
898    mut child: Child,
899    stdout: Option<tokio::process::ChildStdout>,
900    stderr: Option<tokio::process::ChildStderr>,
901    tx: mpsc::UnboundedSender<(u32, SessionOutput)>,
902) {
903    let mut stdout = stdout;
904    let mut stderr = stderr;
905    let mut stdout_eof = stdout.is_none();
906    let mut stderr_eof = stderr.is_none();
907
908    while !stdout_eof || !stderr_eof {
909        let mut stdout_buf = [0u8; 4096];
910        let mut stderr_buf = [0u8; 4096];
911
912        tokio::select! {
913            result = async {
914                match stdout.as_mut() {
915                    Some(out) => out.read(&mut stdout_buf).await,
916                    None => std::future::pending().await,
917                }
918            }, if !stdout_eof => {
919                match result {
920                    Ok(0) | Err(_) => {
921                        stdout = None;
922                        stdout_eof = true;
923                    }
924                    Ok(n) => {
925                        let _ = tx.send((id, SessionOutput::Stdout(stdout_buf[..n].to_vec())));
926                    }
927                }
928            }
929            result = async {
930                match stderr.as_mut() {
931                    Some(err) => err.read(&mut stderr_buf).await,
932                    None => std::future::pending().await,
933                }
934            }, if !stderr_eof => {
935                match result {
936                    Ok(0) | Err(_) => {
937                        stderr = None;
938                        stderr_eof = true;
939                    }
940                    Ok(n) => {
941                        let _ = tx.send((id, SessionOutput::Stderr(stderr_buf[..n].to_vec())));
942                    }
943                }
944            }
945        }
946    }
947
948    // Both streams are done — wait for process exit.
949    let code = match child.wait().await {
950        Ok(status) => status.code().unwrap_or(-1),
951        Err(_) => -1,
952    };
953
954    let _ = tx.send((id, SessionOutput::Exited(code)));
955}
956
957/// Waits for a process to exit by PID and returns the exit code.
958async fn wait_for_pid(pid: i32) -> i32 {
959    tokio::task::spawn_blocking(move || {
960        let mut status: i32 = 0;
961        unsafe {
962            libc::waitpid(pid, &mut status, 0);
963        }
964        if libc::WIFEXITED(status) {
965            libc::WEXITSTATUS(status)
966        } else {
967            -1
968        }
969    })
970    .await
971    .unwrap_or(-1)
972}
973
974//--------------------------------------------------------------------------------------------------
975// Tests
976//--------------------------------------------------------------------------------------------------
977
978#[cfg(test)]
979mod tests {
980    use std::time::Duration;
981
982    use tokio::time;
983
984    use microsandbox_protocol::exec::ExecRequest;
985
986    use super::*;
987
988    #[tokio::test]
989    async fn test_pty_reader_drains_ready_fd() {
990        let (tx, mut rx) = mpsc::unbounded_channel();
991        let req = ExecRequest {
992            cmd: "/bin/sh".to_string(),
993            args: vec![
994                "-c".to_string(),
995                "i=0; while [ $i -lt 256 ]; do printf AAAA; i=$((i+1)); done; printf SECOND; sleep 0.1; printf '<END>\\n'; sleep 0.1; exit 0"
996                    .to_string(),
997            ],
998            env: vec!["PATH=/usr/local/bin:/usr/bin:/bin".to_string()],
999            cwd: None,
1000            user: None,
1001            tty: true,
1002            rows: 24,
1003            cols: 80,
1004            rlimits: Vec::new(),
1005        };
1006
1007        let session = ExecSession::spawn(7, &req, tx, None).expect("spawn pty session");
1008        let mut stdout = Vec::new();
1009        let mut exit = None;
1010
1011        let recv_result = time::timeout(Duration::from_secs(15), async {
1012            while let Some((id, output)) = rx.recv().await {
1013                assert_eq!(id, 7);
1014                match output {
1015                    SessionOutput::Stdout(data) => stdout.extend_from_slice(&data),
1016                    SessionOutput::Exited(code) => {
1017                        exit = Some(code);
1018                        break;
1019                    }
1020                    SessionOutput::Stderr(_) | SessionOutput::Raw(_) => {}
1021                }
1022            }
1023        })
1024        .await;
1025
1026        if recv_result.is_err() {
1027            let _ = session.send_signal(libc::SIGKILL);
1028            panic!("timed out waiting for PTY output");
1029        }
1030
1031        assert_eq!(exit, Some(0));
1032
1033        let second = stdout
1034            .windows(b"SECOND".len())
1035            .position(|window| window == b"SECOND");
1036        let end = stdout
1037            .windows(b"<END>".len())
1038            .position(|window| window == b"<END>");
1039
1040        assert!(
1041            matches!((second, end), (Some(second), Some(end)) if second < end),
1042            "expected immediate PTY write to arrive before later output; got {:?}",
1043            String::from_utf8_lossy(&stdout),
1044        );
1045    }
1046
1047    #[test]
1048    fn test_resolve_user_spec_for_current_uid_gid() {
1049        let uid = unsafe { libc::getuid() };
1050        let gid = unsafe { libc::getgid() };
1051        let resolved = resolve_user_spec(&format!("{uid}:{gid}")).expect("resolve numeric user");
1052        assert_eq!(resolved.uid, uid);
1053        assert_eq!(resolved.gid, gid);
1054    }
1055
1056    #[test]
1057    fn test_request_user_overrides_config_default() {
1058        let req = ExecRequest {
1059            cmd: "/bin/true".to_string(),
1060            args: Vec::new(),
1061            env: Vec::new(),
1062            cwd: None,
1063            user: Some("1:1".to_string()),
1064            tty: false,
1065            rows: 24,
1066            cols: 80,
1067            rlimits: Vec::new(),
1068        };
1069
1070        let resolved = resolve_requested_user(&req, Some("0:0")).expect("resolve requested user");
1071        assert_eq!(resolved.unwrap().uid, 1);
1072    }
1073
1074    #[test]
1075    fn test_config_default_user_used_when_request_has_none() {
1076        let req = ExecRequest {
1077            cmd: "/bin/true".to_string(),
1078            args: Vec::new(),
1079            env: Vec::new(),
1080            cwd: None,
1081            user: None,
1082            tty: false,
1083            rows: 24,
1084            cols: 80,
1085            rlimits: Vec::new(),
1086        };
1087
1088        let uid = unsafe { libc::getuid() };
1089        let gid = unsafe { libc::getgid() };
1090        let resolved = resolve_requested_user(&req, Some(&format!("{uid}:{gid}")))
1091            .expect("resolve with config default");
1092        let resolved = resolved.expect("should resolve to a user");
1093        assert_eq!(resolved.uid, uid);
1094        assert_eq!(resolved.gid, gid);
1095    }
1096
1097    #[test]
1098    fn test_default_home_dir_uses_resolved_user_home() {
1099        let req = ExecRequest {
1100            cmd: "/bin/true".to_string(),
1101            args: Vec::new(),
1102            env: Vec::new(),
1103            cwd: None,
1104            user: None,
1105            tty: false,
1106            rows: 24,
1107            cols: 80,
1108            rlimits: Vec::new(),
1109        };
1110        let user = ResolvedUser {
1111            uid: 1000,
1112            gid: 1000,
1113            initgroups_user: None,
1114            home_dir: Some(CString::new("/home/tester").unwrap()),
1115        };
1116
1117        assert_eq!(
1118            default_home_dir(&req, Some(&user)).map(CStr::to_string_lossy),
1119            Some("/home/tester".into()),
1120        );
1121    }
1122
1123    #[test]
1124    fn test_default_home_dir_respects_explicit_home_env() {
1125        let req = ExecRequest {
1126            cmd: "/bin/true".to_string(),
1127            args: Vec::new(),
1128            env: vec!["HOME=/tmp/custom".to_string()],
1129            cwd: None,
1130            user: None,
1131            tty: false,
1132            rows: 24,
1133            cols: 80,
1134            rlimits: Vec::new(),
1135        };
1136        let user = ResolvedUser {
1137            uid: 1000,
1138            gid: 1000,
1139            initgroups_user: None,
1140            home_dir: Some(CString::new("/home/tester").unwrap()),
1141        };
1142
1143        assert!(default_home_dir(&req, Some(&user)).is_none());
1144    }
1145
1146    #[tokio::test]
1147    async fn test_spawn_pipe_error_does_not_include_probe_details() {
1148        let (tx, _rx) = mpsc::unbounded_channel();
1149        let req = ExecRequest {
1150            cmd: "/definitely/not/a/real/binary".to_string(),
1151            args: Vec::new(),
1152            env: Vec::new(),
1153            cwd: None,
1154            user: None,
1155            tty: false,
1156            rows: 24,
1157            cols: 80,
1158            rlimits: Vec::new(),
1159        };
1160
1161        let err = ExecSession::spawn(9, &req, tx, None).expect_err("spawn should fail");
1162
1163        // Spawn failures now produce the typed `ExecSpawnFailed` so
1164        // the host can render a useful message + hint. The classifier
1165        // maps ENOENT on the binary path to `NotFound`.
1166        let payload = match &err {
1167            AgentdError::ExecSpawnFailed(p) => p,
1168            other => panic!("expected ExecSpawnFailed, got: {other:?}"),
1169        };
1170        assert_eq!(payload.kind, ExecFailureKind::NotFound);
1171        assert_eq!(payload.errno, Some(libc::ENOENT));
1172        assert_eq!(payload.errno_name.as_deref(), Some("ENOENT"));
1173
1174        // The original intent of the test: probe internals leak into
1175        // the error message. The format is now
1176        // `spawn "<cmd>": <io::Error>` from
1177        // `exec_failed_from_io_error`. Verify that none of the old
1178        // probe-detail keys snuck back into the message.
1179        let message = &payload.message;
1180        assert!(message.contains("spawn"));
1181        assert!(!message.contains("symlink_metadata="));
1182        assert!(!message.contains("metadata="));
1183        assert!(!message.contains("magic="));
1184        assert!(!message.contains("path_probe="));
1185        assert!(!message.contains("cwd_probe="));
1186        assert!(!message.contains("target_probe="));
1187    }
1188}