cap_std_ext/cmdext.rs
1//! Extensions for [`std::process::Command`] that operate on concepts from cap-std.
2//!
3//! The key APIs here are:
4//!
5//! - File descriptor passing
6//! - Changing to a file-descriptor relative directory
7//! - Systemd socket activation fd passing
8
9use cap_std::fs::Dir;
10use cap_std::io_lifetimes;
11use cap_tempfile::cap_std;
12use io_lifetimes::OwnedFd;
13use rustix::fd::{AsFd, FromRawFd, IntoRawFd};
14use rustix::io::FdFlags;
15use std::collections::BTreeSet;
16use std::ffi::CString;
17use std::os::fd::AsRawFd;
18use std::os::unix::process::CommandExt;
19use std::sync::Arc;
20
21/// The file descriptor number at which systemd passes the first socket.
22/// See `sd_listen_fds(3)`.
23const SD_LISTEN_FDS_START: i32 = 3;
24
25/// A validated name for a systemd socket-activation file descriptor.
26///
27/// Names appear in the `LISTEN_FDNAMES` environment variable as
28/// colon-separated values. The constructor validates that the name
29/// conforms to systemd's `fdname_is_valid()` rules: at most 255
30/// printable ASCII characters, excluding `:`.
31///
32/// ```
33/// use cap_std_ext::cmdext::SystemdFdName;
34/// let name = SystemdFdName::new("varlink");
35/// ```
36#[derive(Debug, Clone, Copy)]
37pub struct SystemdFdName<'a>(&'a str);
38
39impl<'a> SystemdFdName<'a> {
40 /// Create a new `SystemdFdName`, panicking if `name` is invalid.
41 ///
42 /// # Panics
43 ///
44 /// Panics if `name` is longer than 255 bytes or contains any
45 /// character that is not printable ASCII (i.e. control characters,
46 /// DEL, non-ASCII bytes, or `:`).
47 pub const fn new(name: &'a str) -> Self {
48 assert!(
49 name.len() <= 255,
50 "systemd fd name must be at most 255 characters"
51 );
52 let bytes = name.as_bytes();
53 let mut i = 0;
54 while i < bytes.len() {
55 let b = bytes[i];
56 assert!(
57 b >= b' ' && b < 127 && b != b':',
58 "systemd fd name must only contain printable ASCII characters except ':'"
59 );
60 i += 1;
61 }
62 Self(name)
63 }
64
65 /// Return the name as a string slice.
66 pub fn as_str(&self) -> &'a str {
67 self.0
68 }
69}
70
71/// File descriptor allocator for child processes.
72///
73/// Collects fd assignments and optional systemd socket-activation
74/// configuration, then applies them all at once via
75/// [`CapStdExtCommandExt::take_fds`].
76///
77/// - [`new_systemd_fds`](Self::new_systemd_fds) creates an allocator
78/// with systemd socket-activation fds at 3, 4, … (`SD_LISTEN_FDS_START`).
79/// - [`take_fd`](Self::take_fd) auto-assigns the next fd above all
80/// previously assigned ones (minimum 3).
81/// - [`take_fd_n`](Self::take_fd_n) places an fd at an explicit number,
82/// panicking on overlap.
83///
84/// ```no_run
85/// # use std::sync::Arc;
86/// # use cap_std_ext::cmdext::{CmdFds, CapStdExtCommandExt, SystemdFdName};
87/// # let varlink_fd: Arc<rustix::fd::OwnedFd> = todo!();
88/// # let extra_fd: Arc<rustix::fd::OwnedFd> = todo!();
89/// let mut cmd = std::process::Command::new("myservice");
90/// let mut fds = CmdFds::new_systemd_fds([(varlink_fd, SystemdFdName::new("varlink"))]);
91/// let extra_n = fds.take_fd(extra_fd);
92/// cmd.take_fds(fds);
93/// ```
94#[derive(Debug)]
95pub struct CmdFds {
96 taken: BTreeSet<i32>,
97 fds: Vec<(i32, Arc<OwnedFd>)>,
98 /// Pre-built CStrings for the systemd env vars, set by new_systemd_fds.
99 systemd_env: Option<(CString, CString)>,
100}
101
102impl Default for CmdFds {
103 fn default() -> Self {
104 Self::new()
105 }
106}
107
108impl CmdFds {
109 /// Create a new fd allocator.
110 pub fn new() -> Self {
111 Self {
112 taken: BTreeSet::new(),
113 fds: Vec::new(),
114 systemd_env: None,
115 }
116 }
117
118 /// Create a new fd allocator with systemd socket-activation fds.
119 ///
120 /// Each `(fd, name)` pair is assigned a consecutive fd number starting
121 /// at `SD_LISTEN_FDS_START` (3). The `LISTEN_PID`, `LISTEN_FDS`, and
122 /// `LISTEN_FDNAMES` environment variables will be set in the child
123 /// when [`CapStdExtCommandExt::take_fds`] is called.
124 ///
125 /// Additional (non-systemd) fds can be registered afterwards via
126 /// [`take_fd`](Self::take_fd) or [`take_fd_n`](Self::take_fd_n).
127 ///
128 /// [sd_listen_fds]: https://www.freedesktop.org/software/systemd/man/latest/sd_listen_fds.html
129 pub fn new_systemd_fds<'a>(
130 fds: impl IntoIterator<Item = (Arc<OwnedFd>, SystemdFdName<'a>)>,
131 ) -> Self {
132 let mut this = Self::new();
133 this.register_systemd_fds(fds);
134 this
135 }
136
137 /// Compute the next fd number above everything already taken
138 /// (minimum `SD_LISTEN_FDS_START`).
139 fn next_fd(&self) -> i32 {
140 self.taken
141 .last()
142 .map(|n| n.checked_add(1).expect("fd number overflow"))
143 .unwrap_or(SD_LISTEN_FDS_START)
144 }
145
146 fn insert_fd(&mut self, n: i32) {
147 let inserted = self.taken.insert(n);
148 assert!(inserted, "fd {n} is already assigned");
149 }
150
151 /// Register a file descriptor at the next available fd number.
152 ///
153 /// Returns the fd number that will be assigned in the child.
154 /// Call [`CapStdExtCommandExt::take_fds`] to apply.
155 pub fn take_fd(&mut self, fd: Arc<OwnedFd>) -> i32 {
156 let n = self.next_fd();
157 self.insert_fd(n);
158 self.fds.push((n, fd));
159 n
160 }
161
162 /// Register a file descriptor at a specific fd number.
163 ///
164 /// Call [`CapStdExtCommandExt::take_fds`] to apply.
165 ///
166 /// # Panics
167 ///
168 /// Panics if `target` has already been assigned.
169 pub fn take_fd_n(&mut self, fd: Arc<OwnedFd>, target: i32) -> &mut Self {
170 self.insert_fd(target);
171 self.fds.push((target, fd));
172 self
173 }
174
175 fn register_systemd_fds<'a>(
176 &mut self,
177 fds: impl IntoIterator<Item = (Arc<OwnedFd>, SystemdFdName<'a>)>,
178 ) {
179 let mut n_fds: i32 = 0;
180 let mut names = Vec::new();
181 for (fd, name) in fds {
182 let target = SD_LISTEN_FDS_START
183 .checked_add(n_fds)
184 .expect("too many fds");
185 self.insert_fd(target);
186 self.fds.push((target, fd));
187 names.push(name.as_str());
188 n_fds = n_fds.checked_add(1).expect("too many fds");
189 }
190
191 let fd_count = CString::new(n_fds.to_string()).unwrap();
192 // SAFETY: SystemdFdName guarantees no NUL bytes.
193 let fd_names = CString::new(names.join(":")).unwrap();
194 self.systemd_env = Some((fd_count, fd_names));
195 }
196}
197
198/// Extension trait for [`std::process::Command`].
199///
200/// [`cap_std::fs::Dir`]: https://docs.rs/cap-std/latest/cap_std/fs/struct.Dir.html
201pub trait CapStdExtCommandExt {
202 /// Pass a file descriptor into the target process at a specific fd number.
203 ///
204 /// # Deprecated
205 ///
206 /// Use [`CmdFds`] with [`take_fds`](Self::take_fds) instead. This method
207 /// registers an independent `pre_exec` hook per call, which means
208 /// multiple `take_fd_n` calls (or mixing with `take_fds`) can clobber
209 /// each other when a source fd's raw number equals another mapping's
210 /// target. `take_fds` handles this correctly with atomic fd shuffling.
211 #[deprecated = "Use CmdFds with take_fds() instead"]
212 fn take_fd_n(&mut self, fd: Arc<OwnedFd>, target: i32) -> &mut Self;
213
214 /// Apply a [`CmdFds`] to this command, passing all registered file
215 /// descriptors and (if configured) setting up the systemd
216 /// socket-activation environment.
217 ///
218 /// # Important: Do not use `Command::env()` with systemd fds
219 ///
220 /// When systemd socket-activation environment variables are configured
221 /// (via [`CmdFds::new_systemd_fds`]), they are set using `setenv(3)` in
222 /// a `pre_exec` hook. If `Command::env()` is also called, Rust will
223 /// build an `envp` array that replaces the process environment, causing
224 /// the `LISTEN_*` variables set by the hook to be lost. `Command::envs()`
225 /// is equally problematic. If you need to set additional environment
226 /// variables alongside systemd fds, set them via `pre_exec` + `setenv`
227 /// as well.
228 fn take_fds(&mut self, fds: CmdFds) -> &mut Self;
229
230 /// Use the given directory as the current working directory for the process.
231 fn cwd_dir(&mut self, dir: Dir) -> &mut Self;
232
233 /// On Linux, arrange for [`SIGTERM`] to be delivered to the child if the
234 /// parent *thread* exits. This helps avoid leaking child processes if
235 /// the parent crashes for example.
236 ///
237 /// # IMPORTANT
238 ///
239 /// Due to the semantics of <https://man7.org/linux/man-pages/man2/prctl.2.html> this
240 /// will cause the child to exit when the parent *thread* (not process) exits. In
241 /// particular this can become problematic when used with e.g. a threadpool such
242 /// as Tokio's <https://kobzol.github.io/rust/2025/02/23/tokio-plus-prctl-equals-nasty-bug.html>.
243 #[cfg(any(target_os = "linux", target_os = "android"))]
244 fn lifecycle_bind_to_parent_thread(&mut self) -> &mut Self;
245}
246
247/// Wrapper around `libc::setenv` that checks the return value.
248///
249/// # Safety
250///
251/// Must only be called in a single-threaded context (e.g. after `fork()`
252/// and before `exec()`).
253#[allow(unsafe_code)]
254unsafe fn check_setenv(
255 key: *const std::ffi::c_char,
256 val: *const std::ffi::c_char,
257) -> std::io::Result<()> {
258 // SAFETY: Caller guarantees we are in a single-threaded context
259 // with valid nul-terminated C strings.
260 if unsafe { libc::setenv(key, val, 1) } != 0 {
261 return Err(std::io::Error::last_os_error());
262 }
263 Ok(())
264}
265
266#[allow(unsafe_code)]
267#[allow(deprecated)]
268impl CapStdExtCommandExt for std::process::Command {
269 fn take_fd_n(&mut self, fd: Arc<OwnedFd>, target: i32) -> &mut Self {
270 unsafe {
271 self.pre_exec(move || {
272 let mut target = OwnedFd::from_raw_fd(target);
273 // If the fd is already what we want, then just ensure that
274 // O_CLOEXEC is stripped off.
275 if target.as_raw_fd() == fd.as_raw_fd() {
276 let fl = rustix::io::fcntl_getfd(&target)?;
277 rustix::io::fcntl_setfd(&mut target, fl.difference(FdFlags::CLOEXEC))?;
278 } else {
279 // Otherwise create a dup, which will also default to not setting O_CLOEXEC.
280 rustix::io::dup2(&*fd, &mut target)?;
281 }
282 // Intentionally leak into the child.
283 let _ = target.into_raw_fd();
284 Ok(())
285 });
286 }
287 self
288 }
289
290 fn take_fds(&mut self, fds: CmdFds) -> &mut Self {
291 // Use a single pre_exec hook that handles all fd shuffling atomically.
292 // This avoids the problem where separate hooks clobber each other when
293 // a source fd number equals a target fd number from a different mapping.
294 unsafe {
295 self.pre_exec(move || {
296 // Dup each source fd to a temporary location above all
297 // targets, so that no dup2() in step 2 can clobber a source.
298 let safe_min = fds
299 .fds
300 .iter()
301 .map(|(t, _)| *t)
302 .max()
303 .unwrap_or(0)
304 .checked_add(1)
305 .expect("fd number overflow");
306 let mut safe_copies: Vec<(i32, OwnedFd)> = Vec::new();
307 for (target, fd) in &fds.fds {
308 let copy = rustix::io::fcntl_dupfd_cloexec(fd, safe_min)?;
309 safe_copies.push((*target, copy));
310 }
311
312 // Place each fd at its target via dup2.
313 // We use raw dup2 to avoid fabricating an OwnedFd for a
314 // target number we don't yet own (which would be unsound
315 // if dup2 failed — the OwnedFd drop would close a wrong fd).
316 for (target, copy) in safe_copies {
317 // SAFETY: target is a non-negative fd number that dup2
318 // will atomically (re)open; we don't own it beforehand.
319 let r = libc::dup2(copy.as_raw_fd(), target);
320 if r < 0 {
321 return Err(std::io::Error::last_os_error());
322 }
323 // `copy` drops here, closing the temporary fd.
324 }
325
326 // Handle systemd env vars, if configured
327 if let Some((ref fd_count, ref fd_names)) = fds.systemd_env {
328 let pid = rustix::process::getpid();
329 let pid_dec = rustix::path::DecInt::new(pid.as_raw_nonzero().get());
330 // SAFETY: After fork() and before exec(), the child is
331 // single-threaded, so setenv (which is not thread-safe)
332 // is safe to call here.
333 check_setenv(c"LISTEN_PID".as_ptr(), pid_dec.as_c_str().as_ptr())?;
334 check_setenv(c"LISTEN_FDS".as_ptr(), fd_count.as_ptr())?;
335 check_setenv(c"LISTEN_FDNAMES".as_ptr(), fd_names.as_ptr())?;
336 }
337 Ok(())
338 });
339 }
340 self
341 }
342
343 fn cwd_dir(&mut self, dir: Dir) -> &mut Self {
344 unsafe {
345 self.pre_exec(move || {
346 rustix::process::fchdir(dir.as_fd())?;
347 Ok(())
348 });
349 }
350 self
351 }
352
353 #[cfg(any(target_os = "linux", target_os = "android"))]
354 fn lifecycle_bind_to_parent_thread(&mut self) -> &mut Self {
355 // SAFETY: This API is safe to call in a forked child.
356 unsafe {
357 self.pre_exec(|| {
358 rustix::process::set_parent_process_death_signal(Some(
359 rustix::process::Signal::TERM,
360 ))
361 .map_err(Into::into)
362 });
363 }
364 self
365 }
366}
367
368#[cfg(all(test, any(target_os = "android", target_os = "linux")))]
369mod tests {
370 use super::*;
371 use std::sync::Arc;
372
373 #[allow(deprecated)]
374 #[test]
375 fn test_take_fdn() -> anyhow::Result<()> {
376 // Pass srcfd == destfd and srcfd != destfd
377 for i in 0..=1 {
378 let tempd = cap_tempfile::TempDir::new(cap_std::ambient_authority())?;
379 let tempd_fd = Arc::new(tempd.as_fd().try_clone_to_owned()?);
380 let n = tempd_fd.as_raw_fd() + i;
381 #[cfg(any(target_os = "android", target_os = "linux"))]
382 let path = format!("/proc/self/fd/{n}");
383 #[cfg(not(any(target_os = "android", target_os = "linux")))]
384 let path = format!("/dev/fd/{n}");
385 let st = std::process::Command::new("/usr/bin/env")
386 .arg("readlink")
387 .arg(path)
388 .take_fd_n(tempd_fd, n)
389 .status()?;
390 assert!(st.success());
391 }
392 Ok(())
393 }
394}