Skip to main content

kaish_kernel/
pidfd.rs

1//! Process file descriptors (Linux pidfd) for race-free child kill.
2//!
3//! `kill(pid, sig)` sends a signal to whatever process currently has that PID.
4//! After the kernel reaps the original child, the PID may be reused by an
5//! unrelated process — so a delayed `kill()` from our cancel/timeout path can
6//! signal the wrong target. Linux 5.3+ provides `pidfd_open` to obtain a
7//! generation-bound handle and `pidfd_send_signal` to deliver signals to it,
8//! eliminating the race for the direct child.
9//!
10//! Process-group kills (`killpg`) have no pidfd-equivalent; grandchildren
11//! still go through PID-based delivery and retain the (small) reuse window.
12//!
13//! On non-Linux unix targets, [`KillTarget`] degrades gracefully to PID-based
14//! `kill()` so the call sites stay portable.
15
16#![cfg(all(unix, feature = "native"))]
17
18#[cfg(target_os = "linux")]
19use std::os::fd::{AsRawFd, FromRawFd, OwnedFd};
20
21use nix::sys::signal::Signal;
22use nix::unistd::Pid;
23
24/// A `pidfd` handle (Linux ≥ 5.3). Internally opaque; use [`KillTarget`].
25#[cfg(target_os = "linux")]
26struct Pidfd(OwnedFd);
27
28#[cfg(target_os = "linux")]
29impl Pidfd {
30    /// Open a pidfd for `pid`. Returns `Err` on pre-5.3 kernels (ENOSYS),
31    /// for permission errors (EPERM), or if the process is already gone
32    /// (ESRCH).
33    fn open(pid: u32) -> std::io::Result<Self> {
34        // SAFETY: pidfd_open(pid, flags) is a well-defined syscall that
35        // returns a new fd or -1/errno. We pass flags = 0 (no PIDFD_NONBLOCK)
36        // and a positive PID. The returned fd is exclusively ours.
37        // libc::syscall is variadic — pass each arg as c_long so the C
38        // variadic ABI promotes consistently across architectures.
39        #[allow(unsafe_code)]
40        let res = unsafe {
41            nix::libc::syscall(
42                nix::libc::SYS_pidfd_open,
43                pid as nix::libc::c_long,
44                0 as nix::libc::c_long,
45            )
46        };
47        if res < 0 {
48            return Err(std::io::Error::last_os_error());
49        }
50        // FDs fit in c_int (~10^6 max on Linux), so the i32 cast is safe.
51        // SAFETY: a non-negative syscall return is a fresh fd this process
52        // owns. From_raw_fd takes ownership; OwnedFd will close on drop.
53        #[allow(unsafe_code)]
54        let fd = unsafe { OwnedFd::from_raw_fd(res as i32) };
55        Ok(Self(fd))
56    }
57
58    fn send_signal(&self, sig: Signal) -> std::io::Result<()> {
59        // SAFETY: pidfd_send_signal(fd, sig, info=NULL, flags=0) is a
60        // well-defined syscall; passing NULL info synthesises a default
61        // siginfo_t. The fd outlives the call (held in &self). All
62        // variadic args passed as c_long for portable ABI.
63        #[allow(unsafe_code)]
64        let res = unsafe {
65            nix::libc::syscall(
66                nix::libc::SYS_pidfd_send_signal,
67                self.0.as_raw_fd() as nix::libc::c_long,
68                sig as nix::libc::c_int as nix::libc::c_long,
69                std::ptr::null::<nix::libc::siginfo_t>(),
70                0 as nix::libc::c_long,
71            )
72        };
73        if res < 0 {
74            Err(std::io::Error::last_os_error())
75        } else {
76            Ok(())
77        }
78    }
79}
80
81/// A bundle of identifiers for killing a child process.
82///
83/// On Linux, holds a pidfd for race-free direct-child signalling. On other
84/// unix targets, just the raw PID — kills go through `kill(pid, sig)`.
85/// Process-group signals always use the PID as PGID (we set `pgid = pid` in
86/// the child via `setpgid` at spawn) and accept the killpg PID-reuse window.
87pub struct KillTarget {
88    pid: Pid,
89    #[cfg(target_os = "linux")]
90    pidfd: Option<Pidfd>,
91}
92
93impl KillTarget {
94    /// Build a kill target from a freshly spawned `tokio::process::Child`.
95    ///
96    /// Returns `None` if the child has no PID (already reaped). On Linux,
97    /// attempts to open a pidfd; if that fails (old kernel, permission
98    /// denied), the target falls back to PID-based kill — the caller still
99    /// gets a usable target, just without the reuse-race protection.
100    pub fn from_child(child: &tokio::process::Child) -> Option<Self> {
101        let pid_raw = child.id()?;
102        Some(Self {
103            pid: Pid::from_raw(pid_raw as i32),
104            #[cfg(target_os = "linux")]
105            pidfd: Pidfd::open(pid_raw).ok(),
106        })
107    }
108
109    /// Build a target from a raw PID (the JC watcher uses this — it can't
110    /// borrow the original target across the spawn boundary, so it re-opens).
111    /// On Linux, a fresh pidfd is opened; if it fails (e.g. the child was
112    /// reaped between spawn and reopen, leaving the PID free for reuse),
113    /// returns a target that falls back to PID-based kill — best-effort.
114    pub fn from_pid(pid: Pid) -> Self {
115        Self {
116            pid,
117            #[cfg(target_os = "linux")]
118            pidfd: {
119                let raw = pid.as_raw();
120                if raw > 0 {
121                    Pidfd::open(raw as u32).ok()
122                } else {
123                    None
124                }
125            },
126        }
127    }
128
129    /// Send `sig` to the **direct child**. On Linux uses pidfd if available
130    /// (immune to PID reuse), otherwise falls back to `kill(pid, sig)`.
131    pub fn signal(&self, sig: Signal) {
132        #[cfg(target_os = "linux")]
133        if let Some(pfd) = &self.pidfd {
134            let _ = pfd.send_signal(sig);
135            return;
136        }
137        let _ = nix::sys::signal::kill(self.pid, sig);
138    }
139
140    /// Send `sig` to the child's **process group** (PGID == child PID since
141    /// we `setpgid(0, 0)` in the child's `pre_exec`). No pidfd-equivalent
142    /// for PGIDs; this retains the small reuse window for grandchildren.
143    pub fn signal_pg(&self, sig: Signal) {
144        let _ = nix::sys::signal::killpg(self.pid, sig);
145    }
146
147    /// Raw PID, for paths that still need it (logging, JC waitpid, etc.).
148    pub fn pid(&self) -> Pid {
149        self.pid
150    }
151}