Skip to main content

cap_std_ext/
cmdext.rs

1//! Extensions for [`std::process::Command`] that operate on concepts from cap-std.
2//!
3//! The key APIs here are:
4//!
5//! - File descriptor passing
6//! - Changing to a file-descriptor relative directory
7//! - Systemd socket activation fd passing
8
9use cap_std::fs::Dir;
10use cap_std::io_lifetimes;
11use cap_tempfile::cap_std;
12use io_lifetimes::OwnedFd;
13use rustix::fd::{AsFd, FromRawFd, IntoRawFd};
14use rustix::io::FdFlags;
15use std::collections::BTreeSet;
16use std::ffi::CString;
17use std::os::fd::AsRawFd;
18use std::os::unix::process::CommandExt;
19use std::sync::Arc;
20
21/// The file descriptor number at which systemd passes the first socket.
22/// See `sd_listen_fds(3)`.
23const SD_LISTEN_FDS_START: i32 = 3;
24
25/// A validated name for a systemd socket-activation file descriptor.
26///
27/// Names appear in the `LISTEN_FDNAMES` environment variable as
28/// colon-separated values.  The constructor validates that the name
29/// conforms to systemd's `fdname_is_valid()` rules: at most 255
30/// printable ASCII characters, excluding `:`.
31///
32/// ```
33/// use cap_std_ext::cmdext::SystemdFdName;
34/// let name = SystemdFdName::new("varlink");
35/// ```
36#[derive(Debug, Clone, Copy)]
37pub struct SystemdFdName<'a>(&'a str);
38
39impl<'a> SystemdFdName<'a> {
40    /// Create a new `SystemdFdName`, panicking if `name` is invalid.
41    ///
42    /// # Panics
43    ///
44    /// Panics if `name` is longer than 255 bytes or contains any
45    /// character that is not printable ASCII (i.e. control characters,
46    /// DEL, non-ASCII bytes, or `:`).
47    pub const fn new(name: &'a str) -> Self {
48        assert!(
49            name.len() <= 255,
50            "systemd fd name must be at most 255 characters"
51        );
52        let bytes = name.as_bytes();
53        let mut i = 0;
54        while i < bytes.len() {
55            let b = bytes[i];
56            assert!(
57                b >= b' ' && b < 127 && b != b':',
58                "systemd fd name must only contain printable ASCII characters except ':'"
59            );
60            i += 1;
61        }
62        Self(name)
63    }
64
65    /// Return the name as a string slice.
66    pub fn as_str(&self) -> &'a str {
67        self.0
68    }
69}
70
71/// File descriptor allocator for child processes.
72///
73/// Collects fd assignments and optional systemd socket-activation
74/// configuration, then applies them all at once via
75/// [`CapStdExtCommandExt::take_fds`].
76///
77/// - [`new_systemd_fds`](Self::new_systemd_fds) creates an allocator
78///   with systemd socket-activation fds at 3, 4, … (`SD_LISTEN_FDS_START`).
79/// - [`take_fd`](Self::take_fd) auto-assigns the next fd above all
80///   previously assigned ones (minimum 3).
81/// - [`take_fd_n`](Self::take_fd_n) places an fd at an explicit number,
82///   panicking on overlap.
83///
84/// ```no_run
85/// # use std::sync::Arc;
86/// # use cap_std_ext::cmdext::{CmdFds, CapStdExtCommandExt, SystemdFdName};
87/// # let varlink_fd: Arc<rustix::fd::OwnedFd> = todo!();
88/// # let extra_fd: Arc<rustix::fd::OwnedFd> = todo!();
89/// let mut cmd = std::process::Command::new("myservice");
90/// let mut fds = CmdFds::new_systemd_fds([(varlink_fd, SystemdFdName::new("varlink"))]);
91/// let extra_n = fds.take_fd(extra_fd);
92/// cmd.take_fds(fds);
93/// ```
94#[derive(Debug)]
95pub struct CmdFds {
96    taken: BTreeSet<i32>,
97    fds: Vec<(i32, Arc<OwnedFd>)>,
98    /// Pre-built CStrings for the systemd env vars, set by new_systemd_fds.
99    systemd_env: Option<(CString, CString)>,
100}
101
102impl Default for CmdFds {
103    fn default() -> Self {
104        Self::new()
105    }
106}
107
108impl CmdFds {
109    /// Create a new fd allocator.
110    pub fn new() -> Self {
111        Self {
112            taken: BTreeSet::new(),
113            fds: Vec::new(),
114            systemd_env: None,
115        }
116    }
117
118    /// Create a new fd allocator with systemd socket-activation fds.
119    ///
120    /// Each `(fd, name)` pair is assigned a consecutive fd number starting
121    /// at `SD_LISTEN_FDS_START` (3). The `LISTEN_PID`, `LISTEN_FDS`, and
122    /// `LISTEN_FDNAMES` environment variables will be set in the child
123    /// when [`CapStdExtCommandExt::take_fds`] is called.
124    ///
125    /// Additional (non-systemd) fds can be registered afterwards via
126    /// [`take_fd`](Self::take_fd) or [`take_fd_n`](Self::take_fd_n).
127    ///
128    /// [sd_listen_fds]: https://www.freedesktop.org/software/systemd/man/latest/sd_listen_fds.html
129    pub fn new_systemd_fds<'a>(
130        fds: impl IntoIterator<Item = (Arc<OwnedFd>, SystemdFdName<'a>)>,
131    ) -> Self {
132        let mut this = Self::new();
133        this.register_systemd_fds(fds);
134        this
135    }
136
137    /// Compute the next fd number above everything already taken
138    /// (minimum `SD_LISTEN_FDS_START`).
139    fn next_fd(&self) -> i32 {
140        self.taken
141            .last()
142            .map(|n| n.checked_add(1).expect("fd number overflow"))
143            .unwrap_or(SD_LISTEN_FDS_START)
144    }
145
146    fn insert_fd(&mut self, n: i32) {
147        let inserted = self.taken.insert(n);
148        assert!(inserted, "fd {n} is already assigned");
149    }
150
151    /// Register a file descriptor at the next available fd number.
152    ///
153    /// Returns the fd number that will be assigned in the child.
154    /// Call [`CapStdExtCommandExt::take_fds`] to apply.
155    pub fn take_fd(&mut self, fd: Arc<OwnedFd>) -> i32 {
156        let n = self.next_fd();
157        self.insert_fd(n);
158        self.fds.push((n, fd));
159        n
160    }
161
162    /// Register a file descriptor at a specific fd number.
163    ///
164    /// Call [`CapStdExtCommandExt::take_fds`] to apply.
165    ///
166    /// # Panics
167    ///
168    /// Panics if `target` has already been assigned.
169    pub fn take_fd_n(&mut self, fd: Arc<OwnedFd>, target: i32) -> &mut Self {
170        self.insert_fd(target);
171        self.fds.push((target, fd));
172        self
173    }
174
175    fn register_systemd_fds<'a>(
176        &mut self,
177        fds: impl IntoIterator<Item = (Arc<OwnedFd>, SystemdFdName<'a>)>,
178    ) {
179        let mut n_fds: i32 = 0;
180        let mut names = Vec::new();
181        for (fd, name) in fds {
182            let target = SD_LISTEN_FDS_START
183                .checked_add(n_fds)
184                .expect("too many fds");
185            self.insert_fd(target);
186            self.fds.push((target, fd));
187            names.push(name.as_str());
188            n_fds = n_fds.checked_add(1).expect("too many fds");
189        }
190
191        let fd_count = CString::new(n_fds.to_string()).unwrap();
192        // SAFETY: SystemdFdName guarantees no NUL bytes.
193        let fd_names = CString::new(names.join(":")).unwrap();
194        self.systemd_env = Some((fd_count, fd_names));
195    }
196}
197
198/// Extension trait for [`std::process::Command`].
199///
200/// [`cap_std::fs::Dir`]: https://docs.rs/cap-std/latest/cap_std/fs/struct.Dir.html
201pub trait CapStdExtCommandExt {
202    /// Pass a file descriptor into the target process at a specific fd number.
203    ///
204    /// # Deprecated
205    ///
206    /// Use [`CmdFds`] with [`take_fds`](Self::take_fds) instead. This method
207    /// registers an independent `pre_exec` hook per call, which means
208    /// multiple `take_fd_n` calls (or mixing with `take_fds`) can clobber
209    /// each other when a source fd's raw number equals another mapping's
210    /// target. `take_fds` handles this correctly with atomic fd shuffling.
211    #[deprecated = "Use CmdFds with take_fds() instead"]
212    fn take_fd_n(&mut self, fd: Arc<OwnedFd>, target: i32) -> &mut Self;
213
214    /// Apply a [`CmdFds`] to this command, passing all registered file
215    /// descriptors and (if configured) setting up the systemd
216    /// socket-activation environment.
217    ///
218    /// # Important: Do not use `Command::env()` with systemd fds
219    ///
220    /// When systemd socket-activation environment variables are configured
221    /// (via [`CmdFds::new_systemd_fds`]), they are set using `setenv(3)` in
222    /// a `pre_exec` hook. If `Command::env()` is also called, Rust will
223    /// build an `envp` array that replaces the process environment, causing
224    /// the `LISTEN_*` variables set by the hook to be lost. `Command::envs()`
225    /// is equally problematic. If you need to set additional environment
226    /// variables alongside systemd fds, set them via `pre_exec` + `setenv`
227    /// as well.
228    fn take_fds(&mut self, fds: CmdFds) -> &mut Self;
229
230    /// Use the given directory as the current working directory for the process.
231    fn cwd_dir(&mut self, dir: Dir) -> &mut Self;
232
233    /// On Linux, arrange for [`SIGTERM`] to be delivered to the child if the
234    /// parent *thread* exits. This helps avoid leaking child processes if
235    /// the parent crashes for example.
236    ///
237    /// # IMPORTANT
238    ///
239    /// Due to the semantics of <https://man7.org/linux/man-pages/man2/prctl.2.html> this
240    /// will cause the child to exit when the parent *thread* (not process) exits. In
241    /// particular this can become problematic when used with e.g. a threadpool such
242    /// as Tokio's <https://kobzol.github.io/rust/2025/02/23/tokio-plus-prctl-equals-nasty-bug.html>.
243    #[cfg(any(target_os = "linux", target_os = "android"))]
244    fn lifecycle_bind_to_parent_thread(&mut self) -> &mut Self;
245}
246
247/// Wrapper around `libc::setenv` that checks the return value.
248///
249/// # Safety
250///
251/// Must only be called in a single-threaded context (e.g. after `fork()`
252/// and before `exec()`).
253#[allow(unsafe_code)]
254unsafe fn check_setenv(
255    key: *const std::ffi::c_char,
256    val: *const std::ffi::c_char,
257) -> std::io::Result<()> {
258    // SAFETY: Caller guarantees we are in a single-threaded context
259    // with valid nul-terminated C strings.
260    if unsafe { libc::setenv(key, val, 1) } != 0 {
261        return Err(std::io::Error::last_os_error());
262    }
263    Ok(())
264}
265
266#[allow(unsafe_code)]
267#[allow(deprecated)]
268impl CapStdExtCommandExt for std::process::Command {
269    fn take_fd_n(&mut self, fd: Arc<OwnedFd>, target: i32) -> &mut Self {
270        unsafe {
271            self.pre_exec(move || {
272                let mut target = OwnedFd::from_raw_fd(target);
273                // If the fd is already what we want, then just ensure that
274                // O_CLOEXEC is stripped off.
275                if target.as_raw_fd() == fd.as_raw_fd() {
276                    let fl = rustix::io::fcntl_getfd(&target)?;
277                    rustix::io::fcntl_setfd(&mut target, fl.difference(FdFlags::CLOEXEC))?;
278                } else {
279                    // Otherwise create a dup, which will also default to not setting O_CLOEXEC.
280                    rustix::io::dup2(&*fd, &mut target)?;
281                }
282                // Intentionally leak into the child.
283                let _ = target.into_raw_fd();
284                Ok(())
285            });
286        }
287        self
288    }
289
290    fn take_fds(&mut self, fds: CmdFds) -> &mut Self {
291        // Use a single pre_exec hook that handles all fd shuffling atomically.
292        // This avoids the problem where separate hooks clobber each other when
293        // a source fd number equals a target fd number from a different mapping.
294        unsafe {
295            self.pre_exec(move || {
296                // Dup each source fd to a temporary location above all
297                // targets, so that no dup2() in step 2 can clobber a source.
298                let safe_min = fds
299                    .fds
300                    .iter()
301                    .map(|(t, _)| *t)
302                    .max()
303                    .unwrap_or(0)
304                    .checked_add(1)
305                    .expect("fd number overflow");
306                let mut safe_copies: Vec<(i32, OwnedFd)> = Vec::new();
307                for (target, fd) in &fds.fds {
308                    let copy = rustix::io::fcntl_dupfd_cloexec(fd, safe_min)?;
309                    safe_copies.push((*target, copy));
310                }
311
312                // Place each fd at its target via dup2.
313                // We use raw dup2 to avoid fabricating an OwnedFd for a
314                // target number we don't yet own (which would be unsound
315                // if dup2 failed — the OwnedFd drop would close a wrong fd).
316                for (target, copy) in safe_copies {
317                    // SAFETY: target is a non-negative fd number that dup2
318                    // will atomically (re)open; we don't own it beforehand.
319                    let r = libc::dup2(copy.as_raw_fd(), target);
320                    if r < 0 {
321                        return Err(std::io::Error::last_os_error());
322                    }
323                    // `copy` drops here, closing the temporary fd.
324                }
325
326                // Handle systemd env vars, if configured
327                if let Some((ref fd_count, ref fd_names)) = fds.systemd_env {
328                    let pid = rustix::process::getpid();
329                    let pid_dec = rustix::path::DecInt::new(pid.as_raw_nonzero().get());
330                    // SAFETY: After fork() and before exec(), the child is
331                    // single-threaded, so setenv (which is not thread-safe)
332                    // is safe to call here.
333                    check_setenv(c"LISTEN_PID".as_ptr(), pid_dec.as_c_str().as_ptr())?;
334                    check_setenv(c"LISTEN_FDS".as_ptr(), fd_count.as_ptr())?;
335                    check_setenv(c"LISTEN_FDNAMES".as_ptr(), fd_names.as_ptr())?;
336                }
337                Ok(())
338            });
339        }
340        self
341    }
342
343    fn cwd_dir(&mut self, dir: Dir) -> &mut Self {
344        unsafe {
345            self.pre_exec(move || {
346                rustix::process::fchdir(dir.as_fd())?;
347                Ok(())
348            });
349        }
350        self
351    }
352
353    #[cfg(any(target_os = "linux", target_os = "android"))]
354    fn lifecycle_bind_to_parent_thread(&mut self) -> &mut Self {
355        // SAFETY: This API is safe to call in a forked child.
356        unsafe {
357            self.pre_exec(|| {
358                rustix::process::set_parent_process_death_signal(Some(
359                    rustix::process::Signal::TERM,
360                ))
361                .map_err(Into::into)
362            });
363        }
364        self
365    }
366}
367
368#[cfg(all(test, any(target_os = "android", target_os = "linux")))]
369mod tests {
370    use super::*;
371    use std::sync::Arc;
372
373    #[allow(deprecated)]
374    #[test]
375    fn test_take_fdn() -> anyhow::Result<()> {
376        // Pass srcfd == destfd and srcfd != destfd
377        for i in 0..=1 {
378            let tempd = cap_tempfile::TempDir::new(cap_std::ambient_authority())?;
379            let tempd_fd = Arc::new(tempd.as_fd().try_clone_to_owned()?);
380            let n = tempd_fd.as_raw_fd() + i;
381            #[cfg(any(target_os = "android", target_os = "linux"))]
382            let path = format!("/proc/self/fd/{n}");
383            #[cfg(not(any(target_os = "android", target_os = "linux")))]
384            let path = format!("/dev/fd/{n}");
385            let st = std::process::Command::new("/usr/bin/env")
386                .arg("readlink")
387                .arg(path)
388                .take_fd_n(tempd_fd, n)
389                .status()?;
390            assert!(st.success());
391        }
392        Ok(())
393    }
394}