Skip to main content

microsandbox_agentd/
session.rs

1//! Exec session management: spawning processes with PTY or pipe I/O.
2
3use std::ffi::{CStr, CString};
4use std::mem::MaybeUninit;
5use std::os::fd::{AsRawFd, FromRawFd, OwnedFd, RawFd};
6use std::process::Stdio;
7use std::{iter, mem, ptr};
8
9use nix::pty;
10use nix::sys::signal::{self, Signal};
11use nix::unistd::Pid;
12use tokio::io::AsyncReadExt;
13use tokio::process::{Child, Command};
14use tokio::sync::mpsc;
15
16use microsandbox_protocol::exec::{ExecFailed, ExecFailureKind, ExecRequest};
17
18use crate::config::SecurityProfile;
19use crate::error::{AgentdError, AgentdResult};
20use crate::rlimit;
21
22//--------------------------------------------------------------------------------------------------
23// Constants
24//--------------------------------------------------------------------------------------------------
25
26const LINUX_CAPABILITY_VERSION_3: u32 = 0x20080522;
27const CAP_SYS_ADMIN: u32 = 21;
28const CAP_WORD_BITS: u32 = 32;
29const PR_CAPBSET_DROP: libc::c_int = 24;
30const PR_CAP_AMBIENT: libc::c_int = 47;
31const PR_CAP_AMBIENT_CLEAR_ALL: libc::c_int = 4;
32const DEFAULT_USER_SPEC: &str = "0:0";
33
34//--------------------------------------------------------------------------------------------------
35// Functions: classify
36//--------------------------------------------------------------------------------------------------
37
38/// Map an `errno` integer to its standard symbolic name. Returns
39/// `None` for unrecognized values; we only enumerate the ones that
40/// can plausibly come out of fork/exec/setrlimit/setuid paths.
41fn errno_name(e: i32) -> Option<&'static str> {
42    match e {
43        libc::E2BIG => Some("E2BIG"),
44        libc::EACCES => Some("EACCES"),
45        libc::EAGAIN => Some("EAGAIN"),
46        libc::EBUSY => Some("EBUSY"),
47        libc::EFAULT => Some("EFAULT"),
48        libc::EINVAL => Some("EINVAL"),
49        libc::EIO => Some("EIO"),
50        libc::EISDIR => Some("EISDIR"),
51        libc::ELOOP => Some("ELOOP"),
52        libc::EMFILE => Some("EMFILE"),
53        libc::ENAMETOOLONG => Some("ENAMETOOLONG"),
54        libc::ENFILE => Some("ENFILE"),
55        libc::ENOENT => Some("ENOENT"),
56        libc::ENOEXEC => Some("ENOEXEC"),
57        libc::ENOMEM => Some("ENOMEM"),
58        libc::ENOSYS => Some("ENOSYS"),
59        libc::ENOTDIR => Some("ENOTDIR"),
60        libc::ENXIO => Some("ENXIO"),
61        libc::EPERM => Some("EPERM"),
62        libc::ETXTBSY => Some("ETXTBSY"),
63        _ => None,
64    }
65}
66
67/// Classify a fork/exec-time `errno` into one of the
68/// `ExecFailureKind` buckets.
69///
70/// ENOENT is ambiguous in principle (missing binary vs. missing
71/// cwd), but in practice it's overwhelmingly the binary — the cwd
72/// is set in `pre_exec` *before* execvp, and a bad cwd would more
73/// commonly produce ENOTDIR (path component isn't a directory) or
74/// EACCES (no permission to chdir). We classify ENOENT as
75/// `NotFound` and ENOTDIR as `BadCwd`. Edge cases of "bad cwd that
76/// happens to ENOENT" fall through with the message "spawn 'cmd':
77/// No such file or directory" which is still understandable.
78fn classify_spawn_errno(errno: i32) -> ExecFailureKind {
79    match errno {
80        libc::ENOENT => ExecFailureKind::NotFound,
81        libc::ENOTDIR => ExecFailureKind::BadCwd,
82        libc::EACCES | libc::EPERM => ExecFailureKind::PermissionDenied,
83        libc::ENOEXEC => ExecFailureKind::NotExecutable,
84        libc::EISDIR => ExecFailureKind::NotExecutable,
85        libc::ETXTBSY => ExecFailureKind::NotExecutable,
86        libc::E2BIG | libc::ELOOP | libc::ENAMETOOLONG | libc::EFAULT => ExecFailureKind::BadArgs,
87        libc::EMFILE | libc::ENFILE => ExecFailureKind::ResourceLimit,
88        libc::EAGAIN => ExecFailureKind::ResourceLimit,
89        libc::ENOMEM => ExecFailureKind::OutOfMemory,
90        libc::EINVAL => ExecFailureKind::Other,
91        _ => ExecFailureKind::Other,
92    }
93}
94
95/// Build a `ExecFailed` payload from a spawn-time `io::Error`.
96fn exec_failed_from_io_error(err: &std::io::Error, cmd: &str, stage: &str) -> ExecFailed {
97    let errno = err.raw_os_error();
98    let kind = errno
99        .map(classify_spawn_errno)
100        .unwrap_or(ExecFailureKind::Other);
101    let errno_name = errno.and_then(errno_name).map(str::to_string);
102    let message = format!("spawn {cmd:?}: {err}");
103    ExecFailed {
104        kind,
105        errno,
106        errno_name,
107        message,
108        stage: Some(stage.to_string()),
109    }
110}
111
112//--------------------------------------------------------------------------------------------------
113// Types
114//--------------------------------------------------------------------------------------------------
115
116/// An active exec session handle for sending input to a running process.
117///
118/// Output reading is handled by a background task that sends events
119/// via the `mpsc` channel provided at spawn time.
120#[derive(Debug)]
121pub struct ExecSession {
122    /// The PID of the spawned process.
123    pid: i32,
124
125    /// The PTY master fd (only for PTY mode, used for writing and resize).
126    pty_master: Option<OwnedFd>,
127
128    /// The child's stdin (only for pipe mode).
129    stdin: Option<tokio::process::ChildStdin>,
130}
131
132/// Output from a session that the agent loop should forward to the host.
133pub enum SessionOutput {
134    /// Data from stdout (or PTY master).
135    Stdout(Vec<u8>),
136
137    /// Data from stderr (pipe mode only).
138    Stderr(Vec<u8>),
139
140    /// The process has exited with the given code.
141    Exited(i32),
142
143    /// Pre-encoded frame bytes to write directly to the serial output buffer.
144    ///
145    /// Used by filesystem streaming operations that encode their own
146    /// `FsData`/`FsResponse` messages.
147    Raw(Vec<u8>),
148}
149
150struct ResolvedUser {
151    uid: libc::uid_t,
152    gid: libc::gid_t,
153    initgroups_user: Option<CString>,
154    home_dir: Option<CString>,
155}
156
157struct PasswdEntry {
158    name: String,
159    uid: libc::uid_t,
160    gid: libc::gid_t,
161    home_dir: Option<String>,
162}
163
164struct GroupEntry {
165    gid: libc::gid_t,
166}
167
168struct ExecErrorPipe {
169    read_end: OwnedFd,
170    write_end: OwnedFd,
171}
172
173#[repr(C)]
174#[derive(Clone, Copy)]
175struct CapUserHeader {
176    version: u32,
177    pid: libc::c_int,
178}
179
180#[repr(C)]
181#[derive(Clone, Copy)]
182struct CapUserData {
183    effective: u32,
184    permitted: u32,
185    inheritable: u32,
186}
187
188//--------------------------------------------------------------------------------------------------
189// Methods
190//--------------------------------------------------------------------------------------------------
191
192impl ExecSession {
193    /// Spawns a new exec session.
194    ///
195    /// If `req.tty` is true, uses a PTY. Otherwise, uses piped stdin/stdout/stderr.
196    /// A background task is spawned to read output and send events via `tx`.
197    pub fn spawn(
198        id: u32,
199        req: &ExecRequest,
200        tx: mpsc::UnboundedSender<(u32, SessionOutput)>,
201        default_user: Option<&str>,
202        security_profile: SecurityProfile,
203    ) -> AgentdResult<Self> {
204        if req.tty {
205            Self::spawn_pty(id, req, tx, default_user, security_profile)
206        } else {
207            Self::spawn_pipe(id, req, tx, default_user, security_profile)
208        }
209    }
210
211    /// Returns the PID of the spawned process (as u32 for the protocol).
212    pub fn pid(&self) -> u32 {
213        self.pid as u32
214    }
215
216    /// Writes data to the process's stdin (or PTY master).
217    pub async fn write_stdin(&self, data: &[u8]) -> AgentdResult<()> {
218        if let Some(ref master) = self.pty_master {
219            blocking_write_fd(master.as_raw_fd(), data).await
220        } else if let Some(ref stdin) = self.stdin {
221            blocking_write_fd(stdin.as_raw_fd(), data).await
222        } else {
223            Ok(())
224        }
225    }
226
227    /// Resizes the PTY (only applicable for TTY sessions).
228    pub fn resize(&self, rows: u16, cols: u16) -> AgentdResult<()> {
229        if let Some(ref master) = self.pty_master {
230            let ws = libc::winsize {
231                ws_row: rows,
232                ws_col: cols,
233                ws_xpixel: 0,
234                ws_ypixel: 0,
235            };
236            let ret = unsafe { libc::ioctl(master.as_raw_fd(), libc::TIOCSWINSZ, &ws) };
237            if ret < 0 {
238                return Err(std::io::Error::last_os_error().into());
239            }
240        }
241        Ok(())
242    }
243
244    /// Sends a signal to the spawned process.
245    pub fn send_signal(&self, signum: i32) -> AgentdResult<()> {
246        let sig = Signal::try_from(signum)
247            .map_err(|e| AgentdError::ExecSession(format!("invalid signal {signum}: {e}")))?;
248        signal::kill(Pid::from_raw(self.pid), sig)?;
249        Ok(())
250    }
251
252    /// Closes the process's stdin.
253    ///
254    /// For pipe mode, drops the `ChildStdin` handle which closes the fd.
255    /// For PTY mode, this is a no-op (the PTY master stays open for output).
256    pub fn close_stdin(&mut self) {
257        self.stdin.take();
258    }
259}
260
261impl ExecSession {
262    /// Spawns a process with a PTY.
263    fn spawn_pty(
264        id: u32,
265        req: &ExecRequest,
266        tx: mpsc::UnboundedSender<(u32, SessionOutput)>,
267        default_user: Option<&str>,
268        security_profile: SecurityProfile,
269    ) -> AgentdResult<Self> {
270        let pty = pty::openpty(None, None)?;
271        let err_pipe = new_exec_error_pipe()?;
272
273        // Set initial window size.
274        let ws = libc::winsize {
275            ws_row: req.rows,
276            ws_col: req.cols,
277            ws_xpixel: 0,
278            ws_ypixel: 0,
279        };
280        let ret = unsafe { libc::ioctl(pty.master.as_raw_fd(), libc::TIOCSWINSZ, &ws) };
281        if ret < 0 {
282            return Err(std::io::Error::last_os_error().into());
283        }
284
285        let slave_fd = pty.slave.as_raw_fd();
286
287        // Pre-build all strings before fork to avoid allocating in the child.
288        let c_cmd = CString::new(req.cmd.as_str())
289            .map_err(|e| AgentdError::ExecSession(format!("invalid command: {e}")))?;
290        let mut c_args: Vec<CString> = vec![c_cmd.clone()];
291        for arg in &req.args {
292            c_args.push(
293                CString::new(arg.as_str())
294                    .map_err(|e| AgentdError::ExecSession(format!("invalid arg: {e}")))?,
295            );
296        }
297
298        // Build argv pointer array (null-terminated).
299        let argv_ptrs: Vec<*const libc::c_char> = c_args
300            .iter()
301            .map(|s| s.as_ptr())
302            .chain(iter::once(ptr::null()))
303            .collect();
304
305        // Pre-parse environment variables into CStrings.
306        let c_env: Vec<(CString, CString)> = req
307            .env
308            .iter()
309            .filter_map(|var| {
310                let (key, val) = var.split_once('=')?;
311                let k = CString::new(key).ok()?;
312                let v = CString::new(val).ok()?;
313                Some((k, v))
314            })
315            .collect();
316
317        // Pre-build cwd CString.
318        let c_cwd = req
319            .cwd
320            .as_ref()
321            .map(|dir| CString::new(dir.as_str()))
322            .transpose()
323            .map_err(|e| AgentdError::ExecSession(format!("invalid cwd: {e}")))?;
324
325        let resolved_user = resolve_requested_user(req, default_user)?;
326        let default_home = default_home_dir(req, resolved_user.as_ref())?;
327        let home_key = default_home
328            .as_ref()
329            .map(|_| {
330                CString::new("HOME")
331                    .map_err(|e| AgentdError::ExecSession(format!("invalid home env key: {e}")))
332            })
333            .transpose()?;
334
335        // Pre-parse rlimits before fork (no allocations in child).
336        let parsed_rlimits = rlimit::to_libc(&req.rlimits);
337
338        // Fork.
339        let pid = unsafe { libc::fork() };
340        if pid < 0 {
341            let io_err = std::io::Error::last_os_error();
342            return Err(AgentdError::ExecSpawnFailed(exec_failed_from_io_error(
343                &io_err, &req.cmd, "fork",
344            )));
345        }
346
347        #[allow(unreachable_code)]
348        if pid == 0 {
349            // Child process — only async-signal-safe operations from here.
350            drop(pty.master);
351            drop(err_pipe.read_end);
352
353            // Create new session.
354            if unsafe { libc::setsid() } < 0 {
355                unsafe { libc::_exit(1) };
356            }
357
358            // Set controlling terminal.
359            if unsafe { libc::ioctl(slave_fd, libc::TIOCSCTTY, 0) } < 0 {
360                unsafe { libc::_exit(1) };
361            }
362
363            // Dup slave to stdin/stdout/stderr.
364            unsafe {
365                if libc::dup2(slave_fd, 0) < 0 {
366                    libc::_exit(1);
367                }
368                if libc::dup2(slave_fd, 1) < 0 {
369                    libc::_exit(1);
370                }
371                if libc::dup2(slave_fd, 2) < 0 {
372                    libc::_exit(1);
373                }
374                if slave_fd > 2 {
375                    libc::close(slave_fd);
376                }
377            }
378
379            // Set environment variables using pre-built CStrings.
380            for (key, val) in &c_env {
381                unsafe {
382                    libc::setenv(key.as_ptr(), val.as_ptr(), 1);
383                }
384            }
385
386            // Set working directory.
387            if let Some(ref dir) = c_cwd {
388                unsafe {
389                    libc::chdir(dir.as_ptr());
390                }
391            }
392
393            if apply_exec_security_profile(security_profile).is_err() {
394                unsafe { libc::_exit(1) };
395            }
396
397            if let Some(ref user) = resolved_user
398                && apply_resolved_user(user).is_err()
399            {
400                unsafe { libc::_exit(1) };
401            }
402
403            if let (Some(key), Some(home)) = (&home_key, &default_home) {
404                unsafe {
405                    libc::setenv(key.as_ptr(), home.as_ptr(), 1);
406                }
407            }
408
409            // Apply resource limits.
410            for (resource, limit) in &parsed_rlimits {
411                if unsafe { libc::setrlimit(*resource as _, limit) } != 0 {
412                    unsafe { libc::_exit(1) };
413                }
414            }
415
416            // execvp — on success this never returns.
417            unsafe {
418                libc::execvp(argv_ptrs[0], argv_ptrs.as_ptr());
419            }
420
421            // If execvp returns, it failed.
422            write_exec_error_and_exit(err_pipe.write_end.as_raw_fd());
423        }
424
425        // Parent process.
426        drop(pty.slave);
427        drop(err_pipe.write_end);
428
429        if let Some(exec_errno) = read_exec_error(err_pipe.read_end.as_raw_fd())? {
430            let _ = wait_for_exec_failure_child(pid);
431            let io_err = std::io::Error::from_raw_os_error(exec_errno);
432            return Err(AgentdError::ExecSpawnFailed(exec_failed_from_io_error(
433                &io_err, &req.cmd, "execvp",
434            )));
435        }
436
437        // Dup the master fd for the reader task.
438        let reader_fd = unsafe { libc::dup(pty.master.as_raw_fd()) };
439        if reader_fd < 0 {
440            return Err(std::io::Error::last_os_error().into());
441        }
442        let reader_fd = unsafe { OwnedFd::from_raw_fd(reader_fd) };
443
444        // Spawn background reader task.
445        tokio::spawn(pty_reader_task(id, pid, reader_fd, tx));
446
447        Ok(Self {
448            pid,
449            pty_master: Some(pty.master),
450            stdin: None,
451        })
452    }
453
454    /// Spawns a process with piped stdio.
455    fn spawn_pipe(
456        id: u32,
457        req: &ExecRequest,
458        tx: mpsc::UnboundedSender<(u32, SessionOutput)>,
459        default_user: Option<&str>,
460        security_profile: SecurityProfile,
461    ) -> AgentdResult<Self> {
462        let mut cmd = Command::new(&req.cmd);
463        cmd.args(&req.args)
464            .stdin(Stdio::piped())
465            .stdout(Stdio::piped())
466            .stderr(Stdio::piped());
467
468        for var in &req.env {
469            if let Some((key, val)) = var.split_once('=') {
470                cmd.env(key, val);
471            }
472        }
473
474        if let Some(ref dir) = req.cwd {
475            cmd.current_dir(dir);
476        }
477
478        let resolved_user = resolve_requested_user(req, default_user)?;
479        if let Some(home) = default_home_dir(req, resolved_user.as_ref())? {
480            cmd.env("HOME", home.to_string_lossy().into_owned());
481        }
482
483        // Apply the security profile and resource limits in the child before exec.
484        let parsed_rlimits = rlimit::to_libc(&req.rlimits);
485        unsafe {
486            cmd.pre_exec(move || {
487                apply_exec_security_profile(security_profile).map_err(agentd_to_io_error)?;
488                if let Some(ref user) = resolved_user {
489                    apply_resolved_user(user).map_err(agentd_to_io_error)?;
490                }
491                for (resource, limit) in &parsed_rlimits {
492                    if libc::setrlimit(*resource as _, limit) != 0 {
493                        return Err(std::io::Error::last_os_error());
494                    }
495                }
496                Ok(())
497            });
498        }
499
500        let cmd_label = req.cmd.clone();
501        let mut child = cmd.spawn().map_err(|err| {
502            AgentdError::ExecSpawnFailed(exec_failed_from_io_error(
503                &err,
504                &cmd_label,
505                "Command::spawn",
506            ))
507        })?;
508        let pid = child.id().unwrap_or(0) as i32;
509        let stdin = child.stdin.take();
510        let stdout = child.stdout.take();
511        let stderr = child.stderr.take();
512
513        // Spawn background reader task.
514        tokio::spawn(pipe_reader_task(id, child, stdout, stderr, tx));
515
516        Ok(Self {
517            pid,
518            pty_master: None,
519            stdin,
520        })
521    }
522}
523
524//--------------------------------------------------------------------------------------------------
525// Functions
526//--------------------------------------------------------------------------------------------------
527
528fn new_exec_error_pipe() -> AgentdResult<ExecErrorPipe> {
529    let mut fds = [0; 2];
530    let ret = unsafe { libc::pipe2(fds.as_mut_ptr(), libc::O_CLOEXEC) };
531    if ret != 0 {
532        return Err(std::io::Error::last_os_error().into());
533    }
534
535    Ok(ExecErrorPipe {
536        read_end: unsafe { OwnedFd::from_raw_fd(fds[0]) },
537        write_end: unsafe { OwnedFd::from_raw_fd(fds[1]) },
538    })
539}
540
541fn write_exec_error_and_exit(err_fd: RawFd) -> ! {
542    let errno = unsafe { *libc::__errno_location() };
543    let bytes = errno.to_ne_bytes();
544    let _ = unsafe { libc::write(err_fd, bytes.as_ptr() as *const libc::c_void, bytes.len()) };
545    unsafe { libc::_exit(127) }
546}
547
548fn read_exec_error(err_fd: RawFd) -> AgentdResult<Option<i32>> {
549    let mut buf = [0u8; mem::size_of::<i32>()];
550    let n = unsafe { libc::read(err_fd, buf.as_mut_ptr() as *mut libc::c_void, buf.len()) };
551    if n < 0 {
552        return Err(std::io::Error::last_os_error().into());
553    }
554    if n == 0 {
555        return Ok(None);
556    }
557    if n as usize != buf.len() {
558        return Err(AgentdError::ExecSession(format!(
559            "short exec error report: expected {} bytes, got {n}",
560            buf.len()
561        )));
562    }
563    Ok(Some(i32::from_ne_bytes(buf)))
564}
565
566fn wait_for_exec_failure_child(pid: i32) -> AgentdResult<()> {
567    let ret = unsafe { libc::waitpid(pid, ptr::null_mut(), 0) };
568    if ret < 0 {
569        return Err(std::io::Error::last_os_error().into());
570    }
571    Ok(())
572}
573
574fn apply_exec_security_profile(profile: SecurityProfile) -> AgentdResult<()> {
575    match profile {
576        SecurityProfile::Default => Ok(()),
577        SecurityProfile::Restricted => drop_mount_admin_privileges(),
578    }
579}
580
581fn drop_mount_admin_privileges() -> AgentdResult<()> {
582    if unsafe { libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) } != 0 {
583        return Err(std::io::Error::last_os_error().into());
584    }
585
586    let ret = unsafe { libc::prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0) };
587    if ret != 0 {
588        let err = std::io::Error::last_os_error();
589        if err.raw_os_error() != Some(libc::EINVAL) {
590            return Err(err.into());
591        }
592    }
593
594    let mut header = CapUserHeader {
595        version: LINUX_CAPABILITY_VERSION_3,
596        pid: 0,
597    };
598    let mut data = [CapUserData {
599        effective: 0,
600        permitted: 0,
601        inheritable: 0,
602    }; 2];
603
604    if unsafe { libc::syscall(libc::SYS_capget, &mut header, data.as_mut_ptr()) } != 0 {
605        return Err(std::io::Error::last_os_error().into());
606    }
607
608    let index = (CAP_SYS_ADMIN / CAP_WORD_BITS) as usize;
609    let mask = 1u32 << (CAP_SYS_ADMIN % CAP_WORD_BITS);
610    let had_sys_admin = data[index].effective & mask != 0
611        || data[index].permitted & mask != 0
612        || data[index].inheritable & mask != 0;
613
614    if had_sys_admin {
615        data[index].effective &= !mask;
616        data[index].permitted &= !mask;
617        data[index].inheritable &= !mask;
618
619        if unsafe { libc::syscall(libc::SYS_capset, &mut header, data.as_ptr()) } != 0 {
620            return Err(std::io::Error::last_os_error().into());
621        }
622    }
623
624    let ret = unsafe { libc::prctl(PR_CAPBSET_DROP, CAP_SYS_ADMIN, 0, 0, 0) };
625    if ret != 0 {
626        let err = std::io::Error::last_os_error();
627        let errno = err.raw_os_error();
628        // Already-unprivileged callers may also lack CAP_SETPCAP for the bounding-set drop.
629        let already_unprivileged = !had_sys_admin && errno == Some(libc::EPERM);
630        if errno != Some(libc::EINVAL) && !already_unprivileged {
631            return Err(err.into());
632        }
633    }
634
635    Ok(())
636}
637
638pub(crate) fn resolve_default_user(default_user: Option<&str>) -> AgentdResult<(u32, u32)> {
639    let Some(spec) = default_user
640        .map(str::trim)
641        .filter(|value| !value.is_empty())
642    else {
643        return Ok((0, 0));
644    };
645
646    let resolved = resolve_user_spec(spec)?;
647    Ok((resolved.uid, resolved.gid))
648}
649
650fn resolve_requested_user(
651    req: &ExecRequest,
652    default_user: Option<&str>,
653) -> AgentdResult<Option<ResolvedUser>> {
654    let default_user = default_user
655        .map(str::trim)
656        .filter(|value| !value.is_empty());
657    let requested = req
658        .user
659        .as_deref()
660        .map(str::trim)
661        .filter(|value| !value.is_empty())
662        .or(default_user);
663
664    requested.map(resolve_user_spec).transpose()
665}
666
667fn resolve_user_spec(spec: &str) -> AgentdResult<ResolvedUser> {
668    let (user_part, group_part) = match spec.split_once(':') {
669        Some((user, group)) => (user.trim(), Some(group.trim())),
670        None => (spec.trim(), None),
671    };
672
673    if user_part.is_empty() {
674        return Err(AgentdError::ExecSession("user spec has empty user".into()));
675    }
676
677    let passwd = if let Ok(uid) = parse_id(user_part) {
678        lookup_passwd_by_uid(uid)?
679    } else {
680        lookup_passwd_by_name(user_part)?
681            .ok_or_else(|| AgentdError::ExecSession(format!("guest user not found: {user_part}")))?
682            .into()
683    };
684
685    let (uid, passwd_entry) = match passwd {
686        ResolvedUserLookup::Known(entry) => (entry.uid, Some(entry)),
687        ResolvedUserLookup::Numeric(uid) => (uid, None),
688    };
689
690    let gid = match group_part {
691        Some("") => {
692            return Err(AgentdError::ExecSession("user spec has empty group".into()));
693        }
694        Some(group) => resolve_group_spec(group)?,
695        None => passwd_entry
696            .as_ref()
697            .map(|entry| entry.gid)
698            .unwrap_or_else(|| unsafe { libc::getgid() }),
699    };
700
701    let initgroups_user = passwd_entry
702        .as_ref()
703        .map(|entry| CString::new(entry.name.as_str()))
704        .transpose()
705        .map_err(|e| AgentdError::ExecSession(format!("invalid guest user name: {e}")))?;
706
707    Ok(ResolvedUser {
708        uid,
709        gid,
710        initgroups_user,
711        home_dir: passwd_entry
712            .as_ref()
713            .and_then(|entry| entry.home_dir.as_deref())
714            .map(CString::new)
715            .transpose()
716            .map_err(|e| AgentdError::ExecSession(format!("invalid guest home directory: {e}")))?,
717    })
718}
719
720enum ResolvedUserLookup {
721    Known(PasswdEntry),
722    Numeric(libc::uid_t),
723}
724
725impl From<PasswdEntry> for ResolvedUserLookup {
726    fn from(value: PasswdEntry) -> Self {
727        Self::Known(value)
728    }
729}
730
731fn resolve_group_spec(spec: &str) -> AgentdResult<libc::gid_t> {
732    if let Ok(gid) = parse_id(spec) {
733        return Ok(gid);
734    }
735
736    lookup_group_by_name(spec)?
737        .map(|entry| entry.gid)
738        .ok_or_else(|| AgentdError::ExecSession(format!("guest group not found: {spec}")))
739}
740
741fn parse_id(value: &str) -> Result<u32, std::num::ParseIntError> {
742    value.parse::<u32>()
743}
744
745fn lookup_passwd_by_name(name: &str) -> AgentdResult<Option<PasswdEntry>> {
746    let name = CString::new(name)
747        .map_err(|e| AgentdError::ExecSession(format!("invalid guest user name: {e}")))?;
748    let mut pwd = MaybeUninit::<libc::passwd>::uninit();
749    let mut result = ptr::null_mut();
750    let mut buf = vec![0u8; lookup_buffer_len()];
751    let rc = unsafe {
752        libc::getpwnam_r(
753            name.as_ptr(),
754            pwd.as_mut_ptr(),
755            buf.as_mut_ptr().cast(),
756            buf.len(),
757            &mut result,
758        )
759    };
760    if rc != 0 {
761        return Err(AgentdError::ExecSession(format!(
762            "failed to resolve guest user {name:?}: {}",
763            std::io::Error::from_raw_os_error(rc)
764        )));
765    }
766    if result.is_null() {
767        return Ok(None);
768    }
769
770    let pwd = unsafe { pwd.assume_init() };
771    let name = unsafe { CStr::from_ptr(pwd.pw_name) }
772        .to_string_lossy()
773        .into_owned();
774    let home_dir = unsafe { CStr::from_ptr(pwd.pw_dir) }
775        .to_string_lossy()
776        .into_owned();
777    Ok(Some(PasswdEntry {
778        name,
779        uid: pwd.pw_uid,
780        gid: pwd.pw_gid,
781        home_dir: (!home_dir.is_empty()).then_some(home_dir),
782    }))
783}
784
785fn lookup_passwd_by_uid(uid: libc::uid_t) -> AgentdResult<ResolvedUserLookup> {
786    let mut pwd = MaybeUninit::<libc::passwd>::uninit();
787    let mut result = ptr::null_mut();
788    let mut buf = vec![0u8; lookup_buffer_len()];
789    let rc = unsafe {
790        libc::getpwuid_r(
791            uid,
792            pwd.as_mut_ptr(),
793            buf.as_mut_ptr().cast(),
794            buf.len(),
795            &mut result,
796        )
797    };
798    if rc != 0 {
799        return Err(AgentdError::ExecSession(format!(
800            "failed to resolve guest uid {uid}: {}",
801            std::io::Error::from_raw_os_error(rc)
802        )));
803    }
804    if result.is_null() {
805        return Ok(ResolvedUserLookup::Numeric(uid));
806    }
807
808    let pwd = unsafe { pwd.assume_init() };
809    let name = unsafe { CStr::from_ptr(pwd.pw_name) }
810        .to_string_lossy()
811        .into_owned();
812    let home_dir = unsafe { CStr::from_ptr(pwd.pw_dir) }
813        .to_string_lossy()
814        .into_owned();
815    Ok(ResolvedUserLookup::Known(PasswdEntry {
816        name,
817        uid: pwd.pw_uid,
818        gid: pwd.pw_gid,
819        home_dir: (!home_dir.is_empty()).then_some(home_dir),
820    }))
821}
822
823fn lookup_group_by_name(name: &str) -> AgentdResult<Option<GroupEntry>> {
824    let name = CString::new(name)
825        .map_err(|e| AgentdError::ExecSession(format!("invalid guest group name: {e}")))?;
826    let mut grp = MaybeUninit::<libc::group>::uninit();
827    let mut result = ptr::null_mut();
828    let mut buf = vec![0u8; lookup_buffer_len()];
829    let rc = unsafe {
830        libc::getgrnam_r(
831            name.as_ptr(),
832            grp.as_mut_ptr(),
833            buf.as_mut_ptr().cast(),
834            buf.len(),
835            &mut result,
836        )
837    };
838    if rc != 0 {
839        return Err(AgentdError::ExecSession(format!(
840            "failed to resolve guest group {name:?}: {}",
841            std::io::Error::from_raw_os_error(rc)
842        )));
843    }
844    if result.is_null() {
845        return Ok(None);
846    }
847
848    let grp = unsafe { grp.assume_init() };
849    Ok(Some(GroupEntry { gid: grp.gr_gid }))
850}
851
852fn lookup_buffer_len() -> usize {
853    let size = unsafe { libc::sysconf(libc::_SC_GETPW_R_SIZE_MAX) };
854    if size > 0 { size as usize } else { 16 * 1024 }
855}
856
857fn apply_resolved_user(user: &ResolvedUser) -> AgentdResult<()> {
858    if let Some(ref name) = user.initgroups_user {
859        if unsafe { libc::initgroups(name.as_ptr(), user.gid) } != 0 {
860            return Err(std::io::Error::last_os_error().into());
861        }
862    } else if unsafe { libc::setgroups(0, ptr::null()) } != 0 {
863        return Err(std::io::Error::last_os_error().into());
864    }
865
866    if unsafe { libc::setgid(user.gid) } != 0 {
867        return Err(std::io::Error::last_os_error().into());
868    }
869    if unsafe { libc::setuid(user.uid) } != 0 {
870        return Err(std::io::Error::last_os_error().into());
871    }
872
873    Ok(())
874}
875
876fn default_home_dir(
877    req: &ExecRequest,
878    user: Option<&ResolvedUser>,
879) -> AgentdResult<Option<CString>> {
880    if env_contains_key(&req.env, "HOME") {
881        return Ok(None);
882    }
883
884    if let Some(user) = user {
885        return Ok(user.home_dir.clone());
886    }
887
888    Ok(resolve_user_spec(DEFAULT_USER_SPEC)?.home_dir)
889}
890
891fn env_contains_key(env: &[String], key: &str) -> bool {
892    env.iter().any(|entry| {
893        entry
894            .split_once('=')
895            .map(|(entry_key, _)| entry_key == key)
896            .unwrap_or(false)
897    })
898}
899
900fn agentd_to_io_error(err: AgentdError) -> std::io::Error {
901    std::io::Error::other(err.to_string())
902}
903
904/// Writes data to a raw fd using a blocking task, handling short writes.
905async fn blocking_write_fd(fd: RawFd, data: &[u8]) -> AgentdResult<()> {
906    let data = data.to_vec();
907    tokio::task::spawn_blocking(move || {
908        let mut written = 0;
909        while written < data.len() {
910            let ptr = unsafe { data.as_ptr().add(written) as *const libc::c_void };
911            let ret = unsafe { libc::write(fd, ptr, data.len() - written) };
912            if ret < 0 {
913                let err = std::io::Error::last_os_error();
914                let code = err.raw_os_error();
915                if code == Some(libc::EAGAIN) || code == Some(libc::EWOULDBLOCK) {
916                    wait_fd_writable(fd)?;
917                    continue;
918                }
919                if code == Some(libc::EINTR) {
920                    continue;
921                }
922                return Err(AgentdError::Io(err));
923            }
924            if ret == 0 {
925                wait_fd_writable(fd)?;
926                continue;
927            }
928            written += ret as usize;
929        }
930        Ok(())
931    })
932    .await
933    .map_err(|e| AgentdError::ExecSession(format!("stdin write join error: {e}")))?
934}
935
936fn wait_fd_writable(fd: RawFd) -> AgentdResult<()> {
937    let mut pollfd = libc::pollfd {
938        fd,
939        events: libc::POLLOUT,
940        revents: 0,
941    };
942
943    loop {
944        let ret = unsafe { libc::poll(&mut pollfd, 1, -1) };
945        if ret < 0 {
946            let err = std::io::Error::last_os_error();
947            if err.raw_os_error() == Some(libc::EINTR) {
948                continue;
949            }
950            return Err(AgentdError::Io(err));
951        }
952        if ret == 0 {
953            continue;
954        }
955        // Any positive return means the fd is actionable: POLLOUT lets the
956        // next write make progress, and POLLHUP/POLLERR/POLLNVAL will cause
957        // the next write to fail with a real errno (typically EPIPE) which
958        // is more meaningful than poll's revents.
959        return Ok(());
960    }
961}
962
963/// Background task that reads from a PTY master fd and sends output events.
964async fn pty_reader_task(
965    id: u32,
966    pid: i32,
967    master_fd: OwnedFd,
968    tx: mpsc::UnboundedSender<(u32, SessionOutput)>,
969) {
970    let tx_output = tx.clone();
971    let read_result = tokio::task::spawn_blocking(move || {
972        // PTY masters are safer with a dedicated blocking read loop than with
973        // edge-driven readiness. Fast writers followed by process exit can
974        // strand the tail behind a missed wakeup/HUP transition.
975        let raw = master_fd.as_raw_fd();
976        let flags = unsafe { libc::fcntl(raw, libc::F_GETFL) };
977        if flags >= 0 {
978            unsafe { libc::fcntl(raw, libc::F_SETFL, flags & !libc::O_NONBLOCK) };
979        }
980
981        loop {
982            let mut buf = [0u8; 4096];
983            let n = unsafe { libc::read(raw, buf.as_mut_ptr() as *mut libc::c_void, buf.len()) };
984
985            if n > 0 {
986                if tx_output
987                    .send((id, SessionOutput::Stdout(buf[..n as usize].to_vec())))
988                    .is_err()
989                {
990                    break;
991                }
992                continue;
993            }
994
995            if n == 0 {
996                break;
997            }
998
999            let err = std::io::Error::last_os_error();
1000            match err.raw_os_error() {
1001                Some(libc::EINTR) => continue,
1002                Some(libc::EIO) => break,
1003                _ => break,
1004            }
1005        }
1006    })
1007    .await;
1008
1009    let _ = read_result;
1010
1011    let code = wait_for_pid(pid).await;
1012    let _ = tx.send((id, SessionOutput::Exited(code)));
1013}
1014
1015/// Background task that reads from piped stdout/stderr and sends output events.
1016async fn pipe_reader_task(
1017    id: u32,
1018    mut child: Child,
1019    stdout: Option<tokio::process::ChildStdout>,
1020    stderr: Option<tokio::process::ChildStderr>,
1021    tx: mpsc::UnboundedSender<(u32, SessionOutput)>,
1022) {
1023    let mut stdout = stdout;
1024    let mut stderr = stderr;
1025    let mut stdout_eof = stdout.is_none();
1026    let mut stderr_eof = stderr.is_none();
1027
1028    while !stdout_eof || !stderr_eof {
1029        let mut stdout_buf = [0u8; 4096];
1030        let mut stderr_buf = [0u8; 4096];
1031
1032        tokio::select! {
1033            result = async {
1034                match stdout.as_mut() {
1035                    Some(out) => out.read(&mut stdout_buf).await,
1036                    None => std::future::pending().await,
1037                }
1038            }, if !stdout_eof => {
1039                match result {
1040                    Ok(0) | Err(_) => {
1041                        stdout = None;
1042                        stdout_eof = true;
1043                    }
1044                    Ok(n) => {
1045                        let _ = tx.send((id, SessionOutput::Stdout(stdout_buf[..n].to_vec())));
1046                    }
1047                }
1048            }
1049            result = async {
1050                match stderr.as_mut() {
1051                    Some(err) => err.read(&mut stderr_buf).await,
1052                    None => std::future::pending().await,
1053                }
1054            }, if !stderr_eof => {
1055                match result {
1056                    Ok(0) | Err(_) => {
1057                        stderr = None;
1058                        stderr_eof = true;
1059                    }
1060                    Ok(n) => {
1061                        let _ = tx.send((id, SessionOutput::Stderr(stderr_buf[..n].to_vec())));
1062                    }
1063                }
1064            }
1065        }
1066    }
1067
1068    // Both streams are done — wait for process exit.
1069    let code = match child.wait().await {
1070        Ok(status) => status.code().unwrap_or(-1),
1071        Err(_) => -1,
1072    };
1073
1074    let _ = tx.send((id, SessionOutput::Exited(code)));
1075}
1076
1077/// Waits for a process to exit by PID and returns the exit code.
1078async fn wait_for_pid(pid: i32) -> i32 {
1079    tokio::task::spawn_blocking(move || {
1080        let mut status: i32 = 0;
1081        unsafe {
1082            libc::waitpid(pid, &mut status, 0);
1083        }
1084        if libc::WIFEXITED(status) {
1085            libc::WEXITSTATUS(status)
1086        } else {
1087            -1
1088        }
1089    })
1090    .await
1091    .unwrap_or(-1)
1092}
1093
1094//--------------------------------------------------------------------------------------------------
1095// Tests
1096//--------------------------------------------------------------------------------------------------
1097
1098#[cfg(test)]
1099mod tests {
1100    use std::time::Duration;
1101
1102    use tokio::time;
1103
1104    use microsandbox_protocol::exec::ExecRequest;
1105
1106    use super::*;
1107
1108    #[tokio::test]
1109    async fn test_pty_reader_drains_ready_fd() {
1110        let (tx, mut rx) = mpsc::unbounded_channel();
1111        let req = ExecRequest {
1112            cmd: "/bin/sh".to_string(),
1113            args: vec![
1114                "-c".to_string(),
1115                "i=0; while [ $i -lt 256 ]; do printf AAAA; i=$((i+1)); done; printf SECOND; sleep 0.1; printf '<END>\\n'; sleep 0.1; exit 0"
1116                    .to_string(),
1117            ],
1118            env: vec!["PATH=/usr/local/bin:/usr/bin:/bin".to_string()],
1119            cwd: None,
1120            user: None,
1121            tty: true,
1122            rows: 24,
1123            cols: 80,
1124            rlimits: Vec::new(),
1125        };
1126
1127        let session = ExecSession::spawn(7, &req, tx, None, SecurityProfile::Default)
1128            .expect("spawn pty session");
1129        let mut stdout = Vec::new();
1130        let mut exit = None;
1131
1132        let recv_result = time::timeout(Duration::from_secs(15), async {
1133            while let Some((id, output)) = rx.recv().await {
1134                assert_eq!(id, 7);
1135                match output {
1136                    SessionOutput::Stdout(data) => stdout.extend_from_slice(&data),
1137                    SessionOutput::Exited(code) => {
1138                        exit = Some(code);
1139                        break;
1140                    }
1141                    SessionOutput::Stderr(_) | SessionOutput::Raw(_) => {}
1142                }
1143            }
1144        })
1145        .await;
1146
1147        if recv_result.is_err() {
1148            let _ = session.send_signal(libc::SIGKILL);
1149            panic!("timed out waiting for PTY output");
1150        }
1151
1152        assert_eq!(exit, Some(0));
1153
1154        let second = stdout
1155            .windows(b"SECOND".len())
1156            .position(|window| window == b"SECOND");
1157        let end = stdout
1158            .windows(b"<END>".len())
1159            .position(|window| window == b"<END>");
1160
1161        assert!(
1162            matches!((second, end), (Some(second), Some(end)) if second < end),
1163            "expected immediate PTY write to arrive before later output; got {:?}",
1164            String::from_utf8_lossy(&stdout),
1165        );
1166    }
1167
1168    #[test]
1169    fn test_resolve_user_spec_for_current_uid_gid() {
1170        let uid = unsafe { libc::getuid() };
1171        let gid = unsafe { libc::getgid() };
1172        let resolved = resolve_user_spec(&format!("{uid}:{gid}")).expect("resolve numeric user");
1173        assert_eq!(resolved.uid, uid);
1174        assert_eq!(resolved.gid, gid);
1175    }
1176
1177    #[test]
1178    fn test_request_user_overrides_config_default() {
1179        let req = ExecRequest {
1180            cmd: "/bin/true".to_string(),
1181            args: Vec::new(),
1182            env: Vec::new(),
1183            cwd: None,
1184            user: Some("1:1".to_string()),
1185            tty: false,
1186            rows: 24,
1187            cols: 80,
1188            rlimits: Vec::new(),
1189        };
1190
1191        let resolved = resolve_requested_user(&req, Some("0:0")).expect("resolve requested user");
1192        assert_eq!(resolved.unwrap().uid, 1);
1193    }
1194
1195    #[test]
1196    fn test_config_default_user_used_when_request_has_none() {
1197        let req = ExecRequest {
1198            cmd: "/bin/true".to_string(),
1199            args: Vec::new(),
1200            env: Vec::new(),
1201            cwd: None,
1202            user: None,
1203            tty: false,
1204            rows: 24,
1205            cols: 80,
1206            rlimits: Vec::new(),
1207        };
1208
1209        let uid = unsafe { libc::getuid() };
1210        let gid = unsafe { libc::getgid() };
1211        let resolved = resolve_requested_user(&req, Some(&format!("{uid}:{gid}")))
1212            .expect("resolve with config default");
1213        let resolved = resolved.expect("should resolve to a user");
1214        assert_eq!(resolved.uid, uid);
1215        assert_eq!(resolved.gid, gid);
1216    }
1217
1218    #[test]
1219    fn test_request_without_user_does_not_apply_user_switch() {
1220        let req = ExecRequest {
1221            cmd: "/bin/true".to_string(),
1222            args: Vec::new(),
1223            env: Vec::new(),
1224            cwd: None,
1225            user: None,
1226            tty: false,
1227            rows: 24,
1228            cols: 80,
1229            rlimits: Vec::new(),
1230        };
1231
1232        let resolved = resolve_requested_user(&req, None).expect("resolve absent user");
1233        assert!(resolved.is_none());
1234    }
1235
1236    #[test]
1237    fn test_default_user_absent_resolves_to_root() {
1238        let resolved = resolve_default_user(None).expect("resolve absent default user");
1239        assert_eq!(resolved, (0, 0));
1240    }
1241
1242    #[test]
1243    fn test_default_home_dir_uses_resolved_user_home() {
1244        let req = ExecRequest {
1245            cmd: "/bin/true".to_string(),
1246            args: Vec::new(),
1247            env: Vec::new(),
1248            cwd: None,
1249            user: None,
1250            tty: false,
1251            rows: 24,
1252            cols: 80,
1253            rlimits: Vec::new(),
1254        };
1255        let user = ResolvedUser {
1256            uid: 1000,
1257            gid: 1000,
1258            initgroups_user: None,
1259            home_dir: Some(CString::new("/home/tester").unwrap()),
1260        };
1261
1262        assert_eq!(
1263            default_home_dir(&req, Some(&user))
1264                .expect("resolve default home")
1265                .as_deref()
1266                .map(CStr::to_string_lossy),
1267            Some("/home/tester".into()),
1268        );
1269    }
1270
1271    #[test]
1272    fn test_default_home_dir_uses_root_when_user_absent() {
1273        let req = ExecRequest {
1274            cmd: "/bin/true".to_string(),
1275            args: Vec::new(),
1276            env: Vec::new(),
1277            cwd: None,
1278            user: None,
1279            tty: false,
1280            rows: 24,
1281            cols: 80,
1282            rlimits: Vec::new(),
1283        };
1284        let root = resolve_user_spec(DEFAULT_USER_SPEC).expect("resolve implicit root");
1285
1286        assert_eq!(
1287            default_home_dir(&req, None)
1288                .expect("resolve default home")
1289                .as_deref()
1290                .map(CStr::to_string_lossy),
1291            root.home_dir.as_deref().map(CStr::to_string_lossy),
1292        );
1293    }
1294
1295    #[test]
1296    fn test_default_home_dir_respects_explicit_home_env() {
1297        let req = ExecRequest {
1298            cmd: "/bin/true".to_string(),
1299            args: Vec::new(),
1300            env: vec!["HOME=/tmp/custom".to_string()],
1301            cwd: None,
1302            user: None,
1303            tty: false,
1304            rows: 24,
1305            cols: 80,
1306            rlimits: Vec::new(),
1307        };
1308        let user = ResolvedUser {
1309            uid: 1000,
1310            gid: 1000,
1311            initgroups_user: None,
1312            home_dir: Some(CString::new("/home/tester").unwrap()),
1313        };
1314
1315        assert!(
1316            default_home_dir(&req, Some(&user))
1317                .expect("resolve default home")
1318                .is_none()
1319        );
1320    }
1321
1322    #[tokio::test]
1323    async fn test_spawn_pipe_error_does_not_include_probe_details() {
1324        let (tx, _rx) = mpsc::unbounded_channel();
1325        let req = ExecRequest {
1326            cmd: "/definitely/not/a/real/binary".to_string(),
1327            args: Vec::new(),
1328            env: Vec::new(),
1329            cwd: None,
1330            user: None,
1331            tty: false,
1332            rows: 24,
1333            cols: 80,
1334            rlimits: Vec::new(),
1335        };
1336
1337        let err = ExecSession::spawn(9, &req, tx, None, SecurityProfile::Default)
1338            .expect_err("spawn should fail");
1339
1340        // Spawn failures now produce the typed `ExecSpawnFailed` so
1341        // the host can render a useful message + hint. The classifier
1342        // maps ENOENT on the binary path to `NotFound`.
1343        let payload = match &err {
1344            AgentdError::ExecSpawnFailed(p) => p,
1345            other => panic!("expected ExecSpawnFailed, got: {other:?}"),
1346        };
1347        assert_eq!(payload.kind, ExecFailureKind::NotFound);
1348        assert_eq!(payload.errno, Some(libc::ENOENT));
1349        assert_eq!(payload.errno_name.as_deref(), Some("ENOENT"));
1350
1351        // The original intent of the test: probe internals leak into
1352        // the error message. The format is now
1353        // `spawn "<cmd>": <io::Error>` from
1354        // `exec_failed_from_io_error`. Verify that none of the old
1355        // probe-detail keys snuck back into the message.
1356        let message = &payload.message;
1357        assert!(message.contains("spawn"));
1358        assert!(!message.contains("symlink_metadata="));
1359        assert!(!message.contains("metadata="));
1360        assert!(!message.contains("magic="));
1361        assert!(!message.contains("path_probe="));
1362        assert!(!message.contains("cwd_probe="));
1363        assert!(!message.contains("target_probe="));
1364    }
1365}