Skip to main content

running_process_core/
containment.rs

1//! Process containment via OS-level mechanisms.
2//!
3//! `ContainedProcessGroup` ensures all child processes die when the group is
4//! dropped — even on a crash.
5//!
6//! - **Windows**: Uses a Job Object with `JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE`.
7//!   Dropping the group closes the handle, and Windows automatically terminates
8//!   every process still assigned to the job.
9//! - **Linux**: Uses `setpgid(0, 0)` to place children in a new process group
10//!   and `PR_SET_PDEATHSIG(SIGKILL)` via `prctl()` so the kernel kills the
11//!   child when the parent thread exits.
12//!   **Caveat**: `PR_SET_PDEATHSIG` is reset on `execve` of a set-uid/set-gid
13//!   binary and is tied to the *thread* that called `fork`, not the process
14//!   leader. If the spawning thread exits before the parent process, children
15//!   receive the signal prematurely.
16//! - **macOS**: Uses `setpgid(0, 0)` for process grouping. `PR_SET_PDEATHSIG`
17//!   is not available; parent-death notification is best-effort via polling
18//!   `getppid()` in the child (not implemented here — the Drop-based SIGKILL
19//!   to the process group is the primary mechanism).
20//!
21//! `Containment::Detached` spawns a process that intentionally survives the
22//! group's lifetime (daemon pattern).
23//!
24//! # `RUNNING_PROCESS_ORIGINATOR` environment variable
25//!
26//! When an `originator` is set on a `ContainedProcessGroup`, all spawned child
27//! processes inherit the environment variable `RUNNING_PROCESS_ORIGINATOR` with
28//! the format `TOOL:PID`, where:
29//!
30//! - **TOOL** is the originator name (e.g., `"CLUD"`, `"JUPYTER"`)
31//! - **PID** is the process ID of the parent that spawned the group
32//!
33//! Example value: `RUNNING_PROCESS_ORIGINATOR=CLUD:12345`
34//!
35//! ## Purpose
36//!
37//! This env var enables **cross-process session discovery** after crashes.
38//!
39//! ## Example
40//!
41//! ```no_run
42//! use running_process_core::ContainedProcessGroup;
43//!
44//! let group = ContainedProcessGroup::with_originator("CLUD").unwrap();
45//! let mut cmd = std::process::Command::new("sleep");
46//! cmd.arg("60");
47//! let child = group.spawn(&mut cmd).unwrap();
48//! ```
49
50use std::process::{Child, Command};
51
52/// The environment variable name injected into child processes for
53/// cross-process session discovery.
54pub const ORIGINATOR_ENV_VAR: &str = "RUNNING_PROCESS_ORIGINATOR";
55
56/// Containment policy for a spawned process.
57#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
58pub enum Containment {
59    /// The process is contained: it will be killed when the group is dropped,
60    /// and (on Linux) when the parent thread dies.
61    #[default]
62    Contained,
63    /// The process is detached: it will survive the group being dropped.
64    /// Useful for daemon processes.
65    Detached,
66}
67
68/// A group of processes that are killed together when the group is dropped.
69///
70/// On Windows this wraps a Job Object; on Unix it tracks a process-group ID
71/// and sends `SIGKILL` to the group on drop.
72pub struct ContainedProcessGroup {
73    originator: Option<String>,
74
75    #[cfg(windows)]
76    job: super::WindowsJobHandle,
77
78    #[cfg(unix)]
79    pgid: std::sync::Mutex<Option<i32>>,
80
81    #[cfg(unix)]
82    child_pids: std::sync::Mutex<Vec<u32>>,
83}
84
85/// A handle to a process spawned inside a `ContainedProcessGroup`.
86pub struct ContainedChild {
87    pub child: Child,
88    pub containment: Containment,
89}
90
91/// Format the originator env var value: `TOOL:PID`.
92fn format_originator_value(tool: &str) -> String {
93    format!("{}:{}", tool, std::process::id())
94}
95
96impl ContainedProcessGroup {
97    /// Create a new process group without an originator.
98    pub fn new() -> Result<Self, std::io::Error> {
99        Self::build(None)
100    }
101
102    /// Create a new process group with an originator name.
103    pub fn with_originator(originator: &str) -> Result<Self, std::io::Error> {
104        Self::build(Some(originator.to_string()))
105    }
106
107    fn build(originator: Option<String>) -> Result<Self, std::io::Error> {
108        #[cfg(windows)]
109        {
110            Self::new_windows(originator)
111        }
112        #[cfg(unix)]
113        {
114            Ok(Self {
115                originator,
116                pgid: std::sync::Mutex::new(None),
117                child_pids: std::sync::Mutex::new(Vec::new()),
118            })
119        }
120    }
121
122    /// Returns the originator name, if set.
123    pub fn originator(&self) -> Option<&str> {
124        self.originator.as_deref()
125    }
126
127    /// Returns the full originator env var value (`TOOL:PID`), if set.
128    pub fn originator_value(&self) -> Option<String> {
129        self.originator.as_ref().map(|o| format_originator_value(o))
130    }
131
132    fn inject_originator_env(&self, command: &mut Command) {
133        if let Some(ref originator) = self.originator {
134            command.env(ORIGINATOR_ENV_VAR, format_originator_value(originator));
135        }
136    }
137
138    /// Spawn a contained child process. The child will be killed when this
139    /// group is dropped.
140    pub fn spawn(&self, command: &mut Command) -> Result<ContainedChild, std::io::Error> {
141        self.spawn_with_containment(command, Containment::Contained)
142    }
143
144    /// Spawn a detached child process. The child will survive this group
145    /// being dropped.
146    pub fn spawn_detached(&self, command: &mut Command) -> Result<ContainedChild, std::io::Error> {
147        self.spawn_with_containment(command, Containment::Detached)
148    }
149
150    /// Spawn a child process with the given containment policy.
151    pub fn spawn_with_containment(
152        &self,
153        command: &mut Command,
154        containment: Containment,
155    ) -> Result<ContainedChild, std::io::Error> {
156        self.inject_originator_env(command);
157
158        #[cfg(windows)]
159        {
160            self.spawn_windows(command, containment)
161        }
162        #[cfg(unix)]
163        {
164            self.spawn_unix(command, containment)
165        }
166    }
167}
168
169// ── Windows implementation ──────────────────────────────────────────────────
170
171#[cfg(windows)]
172impl ContainedProcessGroup {
173    fn new_windows(originator: Option<String>) -> Result<Self, std::io::Error> {
174        use std::mem::zeroed;
175        use winapi::shared::minwindef::FALSE;
176        use winapi::um::handleapi::INVALID_HANDLE_VALUE;
177        use winapi::um::jobapi2::{CreateJobObjectW, SetInformationJobObject};
178        use winapi::um::winnt::{
179            JobObjectExtendedLimitInformation, JOBOBJECT_EXTENDED_LIMIT_INFORMATION,
180            JOB_OBJECT_LIMIT_BREAKAWAY_OK, JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE,
181        };
182
183        let job = unsafe { CreateJobObjectW(std::ptr::null_mut(), std::ptr::null()) };
184        if job.is_null() || job == INVALID_HANDLE_VALUE {
185            return Err(std::io::Error::last_os_error());
186        }
187
188        let mut info: JOBOBJECT_EXTENDED_LIMIT_INFORMATION = unsafe { zeroed() };
189        info.BasicLimitInformation.LimitFlags =
190            JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE | JOB_OBJECT_LIMIT_BREAKAWAY_OK;
191        let ok = unsafe {
192            SetInformationJobObject(
193                job,
194                JobObjectExtendedLimitInformation,
195                (&mut info as *mut JOBOBJECT_EXTENDED_LIMIT_INFORMATION).cast(),
196                std::mem::size_of::<JOBOBJECT_EXTENDED_LIMIT_INFORMATION>() as u32,
197            )
198        };
199        if ok == FALSE {
200            let err = std::io::Error::last_os_error();
201            unsafe { winapi::um::handleapi::CloseHandle(job) };
202            return Err(err);
203        }
204
205        Ok(Self {
206            originator,
207            job: super::WindowsJobHandle(job as usize),
208        })
209    }
210
211    fn spawn_windows(
212        &self,
213        command: &mut Command,
214        containment: Containment,
215    ) -> Result<ContainedChild, std::io::Error> {
216        use winapi::shared::minwindef::FALSE;
217        use winapi::um::jobapi2::AssignProcessToJobObject;
218
219        match containment {
220            Containment::Contained => {
221                // Spawn the child, then assign it to our Job Object.
222                let child = command.spawn()?;
223                let handle = {
224                    use std::os::windows::io::AsRawHandle;
225                    child.as_raw_handle()
226                };
227                let ok = unsafe {
228                    AssignProcessToJobObject(
229                        self.job.0 as winapi::shared::ntdef::HANDLE,
230                        handle.cast(),
231                    )
232                };
233                if ok == FALSE {
234                    return Err(std::io::Error::last_os_error());
235                }
236                Ok(ContainedChild { child, containment })
237            }
238            Containment::Detached => {
239                // Detached: simply do NOT assign the child to the Job
240                // Object. The child will survive when the job handle is
241                // closed (and contained siblings are killed).
242                //
243                // NOTE: `CREATE_BREAKAWAY_FROM_JOB` is only useful when
244                // the *spawning* process is already inside a job and wants
245                // to launch a child outside it. Here, our spawning process
246                // is not in the job, so we just skip assignment.
247                let child = command.spawn()?;
248                Ok(ContainedChild { child, containment })
249            }
250        }
251    }
252}
253
254// ── Unix implementation ─────────────────────────────────────────────────────
255
256#[cfg(unix)]
257impl ContainedProcessGroup {
258    fn spawn_unix(
259        &self,
260        command: &mut Command,
261        containment: Containment,
262    ) -> Result<ContainedChild, std::io::Error> {
263        use std::os::unix::process::CommandExt;
264
265        match containment {
266            Containment::Contained => {
267                let pgid_lock = self.pgid.lock().expect("pgid mutex poisoned");
268                let target_pgid = *pgid_lock;
269                drop(pgid_lock);
270
271                unsafe {
272                    command.pre_exec(move || {
273                        // Place child into the group's process group, or create
274                        // a new one if this is the first child.
275                        let pgid = target_pgid.unwrap_or(0);
276                        if libc::setpgid(0, pgid) == -1 {
277                            return Err(std::io::Error::last_os_error());
278                        }
279
280                        // Linux-only: ask the kernel to send SIGKILL to this
281                        // child when the parent thread exits.
282                        // NOTE: PR_SET_PDEATHSIG is tied to the calling
283                        // *thread*, not the process. If the thread that spawned
284                        // this child exits, the child receives the signal even
285                        // if the parent process is still alive.
286                        #[cfg(target_os = "linux")]
287                        {
288                            if libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGKILL) == -1 {
289                                return Err(std::io::Error::last_os_error());
290                            }
291                            // Re-check that the parent hasn't already died
292                            // between fork() and prctl().
293                            if libc::getppid() == 1 {
294                                // Parent already exited; init adopted us.
295                                libc::_exit(1);
296                            }
297                        }
298
299                        Ok(())
300                    });
301                }
302
303                let child = command.spawn()?;
304                let pid = child.id();
305
306                // Record the process group ID.
307                let mut pgid_lock = self.pgid.lock().expect("pgid mutex poisoned");
308                let group_pgid = if let Some(existing) = *pgid_lock {
309                    existing
310                } else {
311                    // First child becomes the process group leader.
312                    *pgid_lock = Some(pid as i32);
313                    pid as i32
314                };
315                drop(pgid_lock);
316
317                // Parent-side setpgid: the standard double-setpgid pattern.
318                // Both parent and child call setpgid so the group assignment
319                // is guaranteed regardless of scheduling order.  EACCES is
320                // expected (child already exec'd) and harmless.
321                unsafe {
322                    libc::setpgid(pid as i32, group_pgid);
323                }
324
325                self.child_pids
326                    .lock()
327                    .expect("child_pids mutex poisoned")
328                    .push(pid);
329
330                Ok(ContainedChild { child, containment })
331            }
332            Containment::Detached => {
333                unsafe {
334                    command.pre_exec(|| {
335                        // Create a new session so the child is fully detached.
336                        if libc::setsid() == -1 {
337                            return Err(std::io::Error::last_os_error());
338                        }
339                        Ok(())
340                    });
341                }
342                let child = command.spawn()?;
343                Ok(ContainedChild { child, containment })
344            }
345        }
346    }
347}
348
349#[cfg(unix)]
350impl Drop for ContainedProcessGroup {
351    fn drop(&mut self) {
352        let pgid = self.pgid.lock().expect("pgid mutex poisoned");
353        if let Some(pgid) = *pgid {
354            // Send SIGKILL to the entire process group. Negative PID targets
355            // the group. Errors are ignored (processes may have already exited).
356            unsafe {
357                libc::killpg(pgid, libc::SIGKILL);
358            }
359        }
360        drop(pgid);
361
362        // Fallback: kill each tracked PID individually, in case any child
363        // failed to join the process group (e.g. race between fork and exec).
364        let pids = self.child_pids.lock().expect("child_pids mutex poisoned");
365        for &pid in pids.iter() {
366            unsafe {
367                libc::kill(pid as i32, libc::SIGKILL);
368            }
369        }
370
371        // Reap zombie children.  After SIGKILL, child processes remain as
372        // zombies in the process table until waitpid() is called.  Without
373        // reaping, kill(pid, 0) still reports them as alive and they consume
374        // a slot in the process table.  SIGKILL is unblockable so blocking
375        // waitpid returns essentially immediately.  If the PID is not our
376        // child (or was already reaped), waitpid returns -1/ECHILD which we
377        // safely ignore.
378        for &pid in pids.iter() {
379            unsafe {
380                libc::waitpid(pid as i32, std::ptr::null_mut(), 0);
381            }
382        }
383    }
384}
385
386// Windows: Job Object handle is closed by WindowsJobHandle::drop, which
387// triggers JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE automatically.
388
389// ── Default trait ───────────────────────────────────────────────────────────
390
391#[cfg(test)]
392mod tests {
393    use super::*;
394
395    #[test]
396    fn containment_default_is_contained() {
397        assert_eq!(Containment::default(), Containment::Contained);
398    }
399
400    #[test]
401    fn containment_clone_and_copy() {
402        let c = Containment::Contained;
403        let c2 = c;
404        assert_eq!(c, c2);
405    }
406
407    #[test]
408    fn containment_debug_format() {
409        assert_eq!(format!("{:?}", Containment::Contained), "Contained");
410        assert_eq!(format!("{:?}", Containment::Detached), "Detached");
411    }
412
413    #[test]
414    fn contained_process_group_creates_successfully() {
415        let group = ContainedProcessGroup::new();
416        assert!(group.is_ok());
417    }
418
419    #[test]
420    fn with_originator_creates_successfully() {
421        let group = ContainedProcessGroup::with_originator("CLUD");
422        assert!(group.is_ok());
423        let group = group.unwrap();
424        assert_eq!(group.originator(), Some("CLUD"));
425    }
426
427    #[test]
428    fn originator_value_format() {
429        let group = ContainedProcessGroup::with_originator("CLUD").unwrap();
430        let value = group.originator_value().unwrap();
431        let expected = format!("CLUD:{}", std::process::id());
432        assert_eq!(value, expected);
433    }
434
435    #[test]
436    fn no_originator_returns_none() {
437        let group = ContainedProcessGroup::new().unwrap();
438        assert!(group.originator().is_none());
439        assert!(group.originator_value().is_none());
440    }
441
442    #[test]
443    fn format_originator_value_correct() {
444        let value = format_originator_value("JUPYTER");
445        let parts: Vec<&str> = value.splitn(2, ':').collect();
446        assert_eq!(parts.len(), 2);
447        assert_eq!(parts[0], "JUPYTER");
448        assert_eq!(parts[1], std::process::id().to_string());
449    }
450}