kaish_kernel/pidfd.rs
1//! Process file descriptors (Linux pidfd) for race-free child kill.
2//!
3//! `kill(pid, sig)` sends a signal to whatever process currently has that PID.
4//! After the kernel reaps the original child, the PID may be reused by an
5//! unrelated process — so a delayed `kill()` from our cancel/timeout path can
6//! signal the wrong target. Linux 5.3+ provides `pidfd_open` to obtain a
7//! generation-bound handle and `pidfd_send_signal` to deliver signals to it,
8//! eliminating the race for the direct child.
9//!
10//! Process-group kills (`killpg`) have no pidfd-equivalent; grandchildren
11//! still go through PID-based delivery and retain the (small) reuse window.
12//!
13//! On non-Linux unix targets, [`KillTarget`] degrades gracefully to PID-based
14//! `kill()` so the call sites stay portable.
15
16#![cfg(all(unix, feature = "native"))]
17
18#[cfg(target_os = "linux")]
19use std::os::fd::{AsRawFd, FromRawFd, OwnedFd};
20
21use nix::sys::signal::Signal;
22use nix::unistd::Pid;
23
24/// A `pidfd` handle (Linux ≥ 5.3). Internally opaque; use [`KillTarget`].
25#[cfg(target_os = "linux")]
26struct Pidfd(OwnedFd);
27
28#[cfg(target_os = "linux")]
29impl Pidfd {
30 /// Open a pidfd for `pid`. Returns `Err` on pre-5.3 kernels (ENOSYS),
31 /// for permission errors (EPERM), or if the process is already gone
32 /// (ESRCH).
33 fn open(pid: u32) -> std::io::Result<Self> {
34 // SAFETY: pidfd_open(pid, flags) is a well-defined syscall that
35 // returns a new fd or -1/errno. We pass flags = 0 (no PIDFD_NONBLOCK)
36 // and a positive PID. The returned fd is exclusively ours.
37 // libc::syscall is variadic — pass each arg as c_long so the C
38 // variadic ABI promotes consistently across architectures.
39 #[allow(unsafe_code)]
40 let res = unsafe {
41 nix::libc::syscall(
42 nix::libc::SYS_pidfd_open,
43 pid as nix::libc::c_long,
44 0 as nix::libc::c_long,
45 )
46 };
47 if res < 0 {
48 return Err(std::io::Error::last_os_error());
49 }
50 // FDs fit in c_int (~10^6 max on Linux), so the i32 cast is safe.
51 // SAFETY: a non-negative syscall return is a fresh fd this process
52 // owns. From_raw_fd takes ownership; OwnedFd will close on drop.
53 #[allow(unsafe_code)]
54 let fd = unsafe { OwnedFd::from_raw_fd(res as i32) };
55 Ok(Self(fd))
56 }
57
58 fn send_signal(&self, sig: Signal) -> std::io::Result<()> {
59 // SAFETY: pidfd_send_signal(fd, sig, info=NULL, flags=0) is a
60 // well-defined syscall; passing NULL info synthesises a default
61 // siginfo_t. The fd outlives the call (held in &self). All
62 // variadic args passed as c_long for portable ABI.
63 #[allow(unsafe_code)]
64 let res = unsafe {
65 nix::libc::syscall(
66 nix::libc::SYS_pidfd_send_signal,
67 self.0.as_raw_fd() as nix::libc::c_long,
68 sig as nix::libc::c_int as nix::libc::c_long,
69 std::ptr::null::<nix::libc::siginfo_t>(),
70 0 as nix::libc::c_long,
71 )
72 };
73 if res < 0 {
74 Err(std::io::Error::last_os_error())
75 } else {
76 Ok(())
77 }
78 }
79}
80
81/// A bundle of identifiers for killing a child process.
82///
83/// On Linux, holds a pidfd for race-free direct-child signalling. On other
84/// unix targets, just the raw PID — kills go through `kill(pid, sig)`.
85/// Process-group signals always use the PID as PGID (we set `pgid = pid` in
86/// the child via `setpgid` at spawn) and accept the killpg PID-reuse window.
87pub struct KillTarget {
88 pid: Pid,
89 #[cfg(target_os = "linux")]
90 pidfd: Option<Pidfd>,
91}
92
93impl KillTarget {
94 /// Build a kill target from a freshly spawned `tokio::process::Child`.
95 ///
96 /// Returns `None` if the child has no PID (already reaped). On Linux,
97 /// attempts to open a pidfd; if that fails (old kernel, permission
98 /// denied), the target falls back to PID-based kill — the caller still
99 /// gets a usable target, just without the reuse-race protection.
100 pub fn from_child(child: &tokio::process::Child) -> Option<Self> {
101 let pid_raw = child.id()?;
102 Some(Self {
103 pid: Pid::from_raw(pid_raw as i32),
104 #[cfg(target_os = "linux")]
105 pidfd: Pidfd::open(pid_raw).ok(),
106 })
107 }
108
109 /// Build a target from a raw PID (the JC watcher uses this — it can't
110 /// borrow the original target across the spawn boundary, so it re-opens).
111 /// On Linux, a fresh pidfd is opened; if it fails (e.g. the child was
112 /// reaped between spawn and reopen, leaving the PID free for reuse),
113 /// returns a target that falls back to PID-based kill — best-effort.
114 pub fn from_pid(pid: Pid) -> Self {
115 Self {
116 pid,
117 #[cfg(target_os = "linux")]
118 pidfd: {
119 let raw = pid.as_raw();
120 if raw > 0 {
121 Pidfd::open(raw as u32).ok()
122 } else {
123 None
124 }
125 },
126 }
127 }
128
129 /// Send `sig` to the **direct child**. On Linux uses pidfd if available
130 /// (immune to PID reuse), otherwise falls back to `kill(pid, sig)`.
131 pub fn signal(&self, sig: Signal) {
132 #[cfg(target_os = "linux")]
133 if let Some(pfd) = &self.pidfd {
134 let _ = pfd.send_signal(sig);
135 return;
136 }
137 let _ = nix::sys::signal::kill(self.pid, sig);
138 }
139
140 /// Send `sig` to the child's **process group** (PGID == child PID since
141 /// we `setpgid(0, 0)` in the child's `pre_exec`). No pidfd-equivalent
142 /// for PGIDs; this retains the small reuse window for grandchildren.
143 pub fn signal_pg(&self, sig: Signal) {
144 let _ = nix::sys::signal::killpg(self.pid, sig);
145 }
146
147 /// Raw PID, for paths that still need it (logging, JC waitpid, etc.).
148 pub fn pid(&self) -> Pid {
149 self.pid
150 }
151}