running_process_core/containment.rs
1//! Process containment via OS-level mechanisms.
2//!
3//! `ContainedProcessGroup` ensures all child processes die when the group is
4//! dropped — even on a crash.
5//!
6//! - **Windows**: Uses a Job Object with `JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE`.
7//! Dropping the group closes the handle, and Windows automatically terminates
8//! every process still assigned to the job.
9//! - **Linux**: Uses `setpgid(0, 0)` to place children in a new process group
10//! and `PR_SET_PDEATHSIG(SIGKILL)` via `prctl()` so the kernel kills the
11//! child when the parent thread exits.
12//! **Caveat**: `PR_SET_PDEATHSIG` is reset on `execve` of a set-uid/set-gid
13//! binary and is tied to the *thread* that called `fork`, not the process
14//! leader. If the spawning thread exits before the parent process, children
15//! receive the signal prematurely.
16//! - **macOS**: Uses `setpgid(0, 0)` for process grouping. `PR_SET_PDEATHSIG`
17//! is not available; parent-death notification is best-effort via polling
18//! `getppid()` in the child (not implemented here — the Drop-based SIGKILL
19//! to the process group is the primary mechanism).
20//!
21//! `Containment::Detached` spawns a process that intentionally survives the
22//! group's lifetime (daemon pattern).
23//!
24//! # `RUNNING_PROCESS_ORIGINATOR` environment variable
25//!
26//! When an `originator` is set on a `ContainedProcessGroup`, all spawned child
27//! processes inherit the environment variable `RUNNING_PROCESS_ORIGINATOR` with
28//! the format `TOOL:PID`, where:
29//!
30//! - **TOOL** is the originator name (e.g., `"CLUD"`, `"JUPYTER"`)
31//! - **PID** is the process ID of the parent that spawned the group
32//!
33//! Example value: `RUNNING_PROCESS_ORIGINATOR=CLUD:12345`
34//!
35//! ## Purpose
36//!
37//! This env var enables **cross-process session discovery** after crashes.
38//!
39//! ## Example
40//!
41//! ```no_run
42//! use running_process_core::ContainedProcessGroup;
43//!
44//! let group = ContainedProcessGroup::with_originator("CLUD").unwrap();
45//! let mut cmd = std::process::Command::new("sleep");
46//! cmd.arg("60");
47//! let child = group.spawn(&mut cmd).unwrap();
48//! ```
49
50use std::process::{Child, Command};
51
52/// The environment variable name injected into child processes for
53/// cross-process session discovery.
54pub const ORIGINATOR_ENV_VAR: &str = "RUNNING_PROCESS_ORIGINATOR";
55
56/// Containment policy for a spawned process.
57#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
58pub enum Containment {
59 /// The process is contained: it will be killed when the group is dropped,
60 /// and (on Linux) when the parent thread dies.
61 #[default]
62 Contained,
63 /// The process is detached: it will survive the group being dropped.
64 /// Useful for daemon processes.
65 Detached,
66}
67
68/// A group of processes that are killed together when the group is dropped.
69///
70/// On Windows this wraps a Job Object; on Unix it tracks a process-group ID
71/// and sends `SIGKILL` to the group on drop.
72pub struct ContainedProcessGroup {
73 originator: Option<String>,
74
75 #[cfg(windows)]
76 job: super::WindowsJobHandle,
77
78 #[cfg(unix)]
79 pgid: std::sync::Mutex<Option<i32>>,
80
81 #[cfg(unix)]
82 child_pids: std::sync::Mutex<Vec<u32>>,
83}
84
85/// A handle to a process spawned inside a `ContainedProcessGroup`.
86pub struct ContainedChild {
87 pub child: Child,
88 pub containment: Containment,
89}
90
91/// Format the originator env var value: `TOOL:PID`.
92fn format_originator_value(tool: &str) -> String {
93 format!("{}:{}", tool, std::process::id())
94}
95
96impl ContainedProcessGroup {
97 /// Create a new process group without an originator.
98 pub fn new() -> Result<Self, std::io::Error> {
99 Self::build(None)
100 }
101
102 /// Create a new process group with an originator name.
103 pub fn with_originator(originator: &str) -> Result<Self, std::io::Error> {
104 Self::build(Some(originator.to_string()))
105 }
106
107 fn build(originator: Option<String>) -> Result<Self, std::io::Error> {
108 #[cfg(windows)]
109 {
110 Self::new_windows(originator)
111 }
112 #[cfg(unix)]
113 {
114 Ok(Self {
115 originator,
116 pgid: std::sync::Mutex::new(None),
117 child_pids: std::sync::Mutex::new(Vec::new()),
118 })
119 }
120 }
121
122 /// Returns the originator name, if set.
123 pub fn originator(&self) -> Option<&str> {
124 self.originator.as_deref()
125 }
126
127 /// Returns the full originator env var value (`TOOL:PID`), if set.
128 pub fn originator_value(&self) -> Option<String> {
129 self.originator.as_ref().map(|o| format_originator_value(o))
130 }
131
132 fn inject_originator_env(&self, command: &mut Command) {
133 if let Some(ref originator) = self.originator {
134 command.env(ORIGINATOR_ENV_VAR, format_originator_value(originator));
135 }
136 }
137
138 /// Spawn a contained child process. The child will be killed when this
139 /// group is dropped.
140 pub fn spawn(&self, command: &mut Command) -> Result<ContainedChild, std::io::Error> {
141 self.spawn_with_containment(command, Containment::Contained)
142 }
143
144 /// Spawn a detached child process. The child will survive this group
145 /// being dropped.
146 pub fn spawn_detached(&self, command: &mut Command) -> Result<ContainedChild, std::io::Error> {
147 self.spawn_with_containment(command, Containment::Detached)
148 }
149
150 /// Spawn a child process with the given containment policy.
151 pub fn spawn_with_containment(
152 &self,
153 command: &mut Command,
154 containment: Containment,
155 ) -> Result<ContainedChild, std::io::Error> {
156 self.inject_originator_env(command);
157
158 #[cfg(windows)]
159 {
160 self.spawn_windows(command, containment)
161 }
162 #[cfg(unix)]
163 {
164 self.spawn_unix(command, containment)
165 }
166 }
167}
168
169// ── Windows implementation ──────────────────────────────────────────────────
170
171#[cfg(windows)]
172impl ContainedProcessGroup {
173 fn new_windows(originator: Option<String>) -> Result<Self, std::io::Error> {
174 use std::mem::zeroed;
175 use winapi::shared::minwindef::FALSE;
176 use winapi::um::handleapi::INVALID_HANDLE_VALUE;
177 use winapi::um::jobapi2::{CreateJobObjectW, SetInformationJobObject};
178 use winapi::um::winnt::{
179 JobObjectExtendedLimitInformation, JOBOBJECT_EXTENDED_LIMIT_INFORMATION,
180 JOB_OBJECT_LIMIT_BREAKAWAY_OK, JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE,
181 };
182
183 let job = unsafe { CreateJobObjectW(std::ptr::null_mut(), std::ptr::null()) };
184 if job.is_null() || job == INVALID_HANDLE_VALUE {
185 return Err(std::io::Error::last_os_error());
186 }
187
188 let mut info: JOBOBJECT_EXTENDED_LIMIT_INFORMATION = unsafe { zeroed() };
189 info.BasicLimitInformation.LimitFlags =
190 JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE | JOB_OBJECT_LIMIT_BREAKAWAY_OK;
191 let ok = unsafe {
192 SetInformationJobObject(
193 job,
194 JobObjectExtendedLimitInformation,
195 (&mut info as *mut JOBOBJECT_EXTENDED_LIMIT_INFORMATION).cast(),
196 std::mem::size_of::<JOBOBJECT_EXTENDED_LIMIT_INFORMATION>() as u32,
197 )
198 };
199 if ok == FALSE {
200 let err = std::io::Error::last_os_error();
201 unsafe { winapi::um::handleapi::CloseHandle(job) };
202 return Err(err);
203 }
204
205 Ok(Self {
206 originator,
207 job: super::WindowsJobHandle(job as usize),
208 })
209 }
210
211 fn spawn_windows(
212 &self,
213 command: &mut Command,
214 containment: Containment,
215 ) -> Result<ContainedChild, std::io::Error> {
216 use winapi::shared::minwindef::FALSE;
217 use winapi::um::jobapi2::AssignProcessToJobObject;
218
219 match containment {
220 Containment::Contained => {
221 // Spawn the child, then assign it to our Job Object.
222 let child = command.spawn()?;
223 let handle = {
224 use std::os::windows::io::AsRawHandle;
225 child.as_raw_handle()
226 };
227 let ok = unsafe {
228 AssignProcessToJobObject(
229 self.job.0 as winapi::shared::ntdef::HANDLE,
230 handle.cast(),
231 )
232 };
233 if ok == FALSE {
234 return Err(std::io::Error::last_os_error());
235 }
236 Ok(ContainedChild { child, containment })
237 }
238 Containment::Detached => {
239 // Detached: simply do NOT assign the child to the Job
240 // Object. The child will survive when the job handle is
241 // closed (and contained siblings are killed).
242 //
243 // NOTE: `CREATE_BREAKAWAY_FROM_JOB` is only useful when
244 // the *spawning* process is already inside a job and wants
245 // to launch a child outside it. Here, our spawning process
246 // is not in the job, so we just skip assignment.
247 let child = command.spawn()?;
248 Ok(ContainedChild { child, containment })
249 }
250 }
251 }
252}
253
254// ── Unix implementation ─────────────────────────────────────────────────────
255
256#[cfg(unix)]
257impl ContainedProcessGroup {
258 fn spawn_unix(
259 &self,
260 command: &mut Command,
261 containment: Containment,
262 ) -> Result<ContainedChild, std::io::Error> {
263 use std::os::unix::process::CommandExt;
264
265 match containment {
266 Containment::Contained => {
267 let pgid_lock = self.pgid.lock().expect("pgid mutex poisoned");
268 let target_pgid = *pgid_lock;
269 drop(pgid_lock);
270
271 unsafe {
272 command.pre_exec(move || {
273 // Place child into the group's process group, or create
274 // a new one if this is the first child.
275 let pgid = target_pgid.unwrap_or(0);
276 if libc::setpgid(0, pgid) == -1 {
277 return Err(std::io::Error::last_os_error());
278 }
279
280 // Linux-only: ask the kernel to send SIGKILL to this
281 // child when the parent thread exits.
282 // NOTE: PR_SET_PDEATHSIG is tied to the calling
283 // *thread*, not the process. If the thread that spawned
284 // this child exits, the child receives the signal even
285 // if the parent process is still alive.
286 #[cfg(target_os = "linux")]
287 {
288 if libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGKILL) == -1 {
289 return Err(std::io::Error::last_os_error());
290 }
291 // Re-check that the parent hasn't already died
292 // between fork() and prctl().
293 if libc::getppid() == 1 {
294 // Parent already exited; init adopted us.
295 libc::_exit(1);
296 }
297 }
298
299 Ok(())
300 });
301 }
302
303 let child = command.spawn()?;
304 let pid = child.id();
305
306 // Record the process group ID.
307 let mut pgid_lock = self.pgid.lock().expect("pgid mutex poisoned");
308 let group_pgid = if let Some(existing) = *pgid_lock {
309 existing
310 } else {
311 // First child becomes the process group leader.
312 *pgid_lock = Some(pid as i32);
313 pid as i32
314 };
315 drop(pgid_lock);
316
317 // Parent-side setpgid: the standard double-setpgid pattern.
318 // Both parent and child call setpgid so the group assignment
319 // is guaranteed regardless of scheduling order. EACCES is
320 // expected (child already exec'd) and harmless.
321 unsafe {
322 libc::setpgid(pid as i32, group_pgid);
323 }
324
325 self.child_pids
326 .lock()
327 .expect("child_pids mutex poisoned")
328 .push(pid);
329
330 Ok(ContainedChild { child, containment })
331 }
332 Containment::Detached => {
333 unsafe {
334 command.pre_exec(|| {
335 // Create a new session so the child is fully detached.
336 if libc::setsid() == -1 {
337 return Err(std::io::Error::last_os_error());
338 }
339 Ok(())
340 });
341 }
342 let child = command.spawn()?;
343 Ok(ContainedChild { child, containment })
344 }
345 }
346 }
347}
348
349#[cfg(unix)]
350impl Drop for ContainedProcessGroup {
351 fn drop(&mut self) {
352 let pgid = self.pgid.lock().expect("pgid mutex poisoned");
353 if let Some(pgid) = *pgid {
354 // Send SIGKILL to the entire process group. Negative PID targets
355 // the group. Errors are ignored (processes may have already exited).
356 unsafe {
357 libc::killpg(pgid, libc::SIGKILL);
358 }
359 }
360 drop(pgid);
361
362 // Fallback: kill each tracked PID individually, in case any child
363 // failed to join the process group (e.g. race between fork and exec).
364 let pids = self.child_pids.lock().expect("child_pids mutex poisoned");
365 for &pid in pids.iter() {
366 unsafe {
367 libc::kill(pid as i32, libc::SIGKILL);
368 }
369 }
370
371 // Reap zombie children. After SIGKILL, child processes remain as
372 // zombies in the process table until waitpid() is called. Without
373 // reaping, kill(pid, 0) still reports them as alive and they consume
374 // a slot in the process table. SIGKILL is unblockable so blocking
375 // waitpid returns essentially immediately. If the PID is not our
376 // child (or was already reaped), waitpid returns -1/ECHILD which we
377 // safely ignore.
378 for &pid in pids.iter() {
379 unsafe {
380 libc::waitpid(pid as i32, std::ptr::null_mut(), 0);
381 }
382 }
383 }
384}
385
386// Windows: Job Object handle is closed by WindowsJobHandle::drop, which
387// triggers JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE automatically.
388
389// ── Default trait ───────────────────────────────────────────────────────────
390
391#[cfg(test)]
392mod tests {
393 use super::*;
394
395 #[test]
396 fn containment_default_is_contained() {
397 assert_eq!(Containment::default(), Containment::Contained);
398 }
399
400 #[test]
401 fn containment_clone_and_copy() {
402 let c = Containment::Contained;
403 let c2 = c;
404 assert_eq!(c, c2);
405 }
406
407 #[test]
408 fn containment_debug_format() {
409 assert_eq!(format!("{:?}", Containment::Contained), "Contained");
410 assert_eq!(format!("{:?}", Containment::Detached), "Detached");
411 }
412
413 #[test]
414 fn contained_process_group_creates_successfully() {
415 let group = ContainedProcessGroup::new();
416 assert!(group.is_ok());
417 }
418
419 #[test]
420 fn with_originator_creates_successfully() {
421 let group = ContainedProcessGroup::with_originator("CLUD");
422 assert!(group.is_ok());
423 let group = group.unwrap();
424 assert_eq!(group.originator(), Some("CLUD"));
425 }
426
427 #[test]
428 fn originator_value_format() {
429 let group = ContainedProcessGroup::with_originator("CLUD").unwrap();
430 let value = group.originator_value().unwrap();
431 let expected = format!("CLUD:{}", std::process::id());
432 assert_eq!(value, expected);
433 }
434
435 #[test]
436 fn no_originator_returns_none() {
437 let group = ContainedProcessGroup::new().unwrap();
438 assert!(group.originator().is_none());
439 assert!(group.originator_value().is_none());
440 }
441
442 #[test]
443 fn format_originator_value_correct() {
444 let value = format_originator_value("JUPYTER");
445 let parts: Vec<&str> = value.splitn(2, ':').collect();
446 assert_eq!(parts.len(), 2);
447 assert_eq!(parts[0], "JUPYTER");
448 assert_eq!(parts[1], std::process::id().to_string());
449 }
450}