Skip to main content

vtcode_bash_runner/
process_group.rs

1//! Process-group helpers for reliable child process cleanup.
2//!
3//! This module centralizes OS-specific pieces that ensure a spawned
4//! command can be cleaned up reliably:
5//! - `set_process_group` is called in `pre_exec` so the child starts its own
6//!   process group.
7//! - `detach_from_tty` starts a new session so non-interactive children do not
8//!   inherit the controlling TTY.
9//! - `kill_process_group_by_pid` targets the whole group (children/grandchildren)
10//!   instead of a single PID.
11//! - `kill_process_group` targets a known process group ID directly.
12//! - `set_parent_death_signal` (Linux only) arranges for the child to receive a
13//!   `SIGTERM` when the parent exits, and re-checks the parent PID to avoid
14//!   races during fork/exec.
15//! - `graceful_kill_process_group` sends SIGTERM, waits for a grace period, then
16//!   SIGKILL if still running.
17//!
18//! On non-Unix platforms these helpers are no-ops or adapted equivalents.
19//!
20//! Inspired by codex-rs/utils/pty process group management patterns.
21
22use std::io;
23
24#[cfg(unix)]
25use tokio::process::Child;
26
27/// Default grace period for graceful termination (milliseconds).
28pub const DEFAULT_GRACEFUL_TIMEOUT_MS: u64 = 500;
29
30/// Signal to send when killing process groups.
31#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
32pub enum KillSignal {
33    /// SIGINT - interrupt (Ctrl+C equivalent)
34    Int,
35    /// SIGTERM - allows graceful shutdown
36    Term,
37    /// SIGKILL - immediate termination
38    #[default]
39    Kill,
40}
41
42#[cfg(unix)]
43impl KillSignal {
44    fn as_libc_signal(self) -> libc::c_int {
45        match self {
46            KillSignal::Int => libc::SIGINT,
47            KillSignal::Term => libc::SIGTERM,
48            KillSignal::Kill => libc::SIGKILL,
49        }
50    }
51}
52
53/// Ensure the child receives SIGTERM when the original parent dies.
54///
55/// This should run in `pre_exec` and uses `parent_pid` captured before spawn to
56/// avoid a race where the parent exits between fork and exec.
57///
58/// # Safety
59/// This function uses unsafe libc calls and should only be called from a pre_exec hook.
60#[cfg(target_os = "linux")]
61pub fn set_parent_death_signal(parent_pid: libc::pid_t) -> io::Result<()> {
62    if unsafe { libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGTERM) } == -1 {
63        return Err(io::Error::last_os_error());
64    }
65
66    // Re-check parent PID to avoid race condition where parent exits between fork and exec
67    if unsafe { libc::getppid() } != parent_pid {
68        unsafe {
69            libc::raise(libc::SIGTERM);
70        }
71    }
72
73    Ok(())
74}
75
76/// No-op on non-Linux platforms.
77#[cfg(not(target_os = "linux"))]
78pub fn set_parent_death_signal(_parent_pid: i32) -> io::Result<()> {
79    Ok(())
80}
81
82/// Detach from the controlling TTY by starting a new session.
83///
84/// This is useful for spawning background processes that should not receive
85/// signals from the controlling terminal.
86#[cfg(unix)]
87pub fn detach_from_tty() -> io::Result<()> {
88    let result = unsafe { libc::setsid() };
89    if result == -1 {
90        let err = io::Error::last_os_error();
91        // EPERM means we're already a session leader, fall back to setpgid
92        if err.raw_os_error() == Some(libc::EPERM) {
93            return set_process_group();
94        }
95        return Err(err);
96    }
97    Ok(())
98}
99
100/// No-op on non-Unix platforms.
101#[cfg(not(unix))]
102pub fn detach_from_tty() -> io::Result<()> {
103    Ok(())
104}
105
106/// Put the calling process into its own process group.
107///
108/// Intended for use in `pre_exec` so the child becomes the group leader.
109#[cfg(unix)]
110pub fn set_process_group() -> io::Result<()> {
111    let result = unsafe { libc::setpgid(0, 0) };
112    if result == -1 {
113        Err(io::Error::last_os_error())
114    } else {
115        Ok(())
116    }
117}
118
119/// No-op on non-Unix platforms.
120#[cfg(not(unix))]
121pub fn set_process_group() -> io::Result<()> {
122    Ok(())
123}
124
125/// Kill the process group for the given PID (best-effort).
126///
127/// This resolves the PGID for `pid` and sends SIGKILL to the whole group.
128#[cfg(unix)]
129pub fn kill_process_group_by_pid(pid: u32) -> io::Result<()> {
130    kill_process_group_by_pid_with_signal(pid, KillSignal::Kill)
131}
132
133/// Kill the process group for the given PID with a specific signal.
134#[cfg(unix)]
135pub fn kill_process_group_by_pid_with_signal(pid: u32, signal: KillSignal) -> io::Result<()> {
136    use std::io::ErrorKind;
137
138    let pid = pid as libc::pid_t;
139    let pgid = unsafe { libc::getpgid(pid) };
140    if pgid == -1 {
141        let err = io::Error::last_os_error();
142        if err.kind() != ErrorKind::NotFound {
143            return Err(err);
144        }
145        return Ok(());
146    }
147
148    let result = unsafe { libc::killpg(pgid, signal.as_libc_signal()) };
149    if result == -1 {
150        let err = io::Error::last_os_error();
151        if err.kind() != ErrorKind::NotFound {
152            return Err(err);
153        }
154    }
155
156    Ok(())
157}
158
159/// No-op on non-Unix platforms.
160#[cfg(not(unix))]
161pub fn kill_process_group_by_pid(_pid: u32) -> io::Result<()> {
162    Ok(())
163}
164
165/// No-op on non-Unix platforms.
166#[cfg(not(unix))]
167pub fn kill_process_group_by_pid_with_signal(_pid: u32, _signal: KillSignal) -> io::Result<()> {
168    Ok(())
169}
170
171/// Kill a specific process group ID (best-effort).
172#[cfg(unix)]
173pub fn kill_process_group(process_group_id: u32) -> io::Result<()> {
174    kill_process_group_with_signal(process_group_id, KillSignal::Kill)
175}
176
177/// Kill a specific process group ID with a specific signal.
178#[cfg(unix)]
179pub fn kill_process_group_with_signal(process_group_id: u32, signal: KillSignal) -> io::Result<()> {
180    use std::io::ErrorKind;
181
182    let pgid = process_group_id as libc::pid_t;
183    let result = unsafe { libc::killpg(pgid, signal.as_libc_signal()) };
184    if result == -1 {
185        let err = io::Error::last_os_error();
186        if err.kind() != ErrorKind::NotFound {
187            return Err(err);
188        }
189    }
190
191    Ok(())
192}
193
194/// No-op on non-Unix platforms.
195#[cfg(not(unix))]
196pub fn kill_process_group(_process_group_id: u32) -> io::Result<()> {
197    Ok(())
198}
199
200/// No-op on non-Unix platforms.
201#[cfg(not(unix))]
202pub fn kill_process_group_with_signal(
203    _process_group_id: u32,
204    _signal: KillSignal,
205) -> io::Result<()> {
206    Ok(())
207}
208
209/// Kill the process group for a tokio child (best-effort).
210#[cfg(unix)]
211pub fn kill_child_process_group(child: &mut Child) -> io::Result<()> {
212    kill_child_process_group_with_signal(child, KillSignal::Kill)
213}
214
215/// Kill the process group for a tokio child with a specific signal.
216#[cfg(unix)]
217pub fn kill_child_process_group_with_signal(
218    child: &mut Child,
219    signal: KillSignal,
220) -> io::Result<()> {
221    if let Some(pid) = child.id() {
222        return kill_process_group_by_pid_with_signal(pid, signal);
223    }
224
225    Ok(())
226}
227
228/// No-op on non-Unix platforms.
229#[cfg(not(unix))]
230pub fn kill_child_process_group(_child: &mut tokio::process::Child) -> io::Result<()> {
231    Ok(())
232}
233
234/// No-op on non-Unix platforms.
235#[cfg(not(unix))]
236pub fn kill_child_process_group_with_signal(
237    _child: &mut tokio::process::Child,
238    _signal: KillSignal,
239) -> io::Result<()> {
240    Ok(())
241}
242
243/// Kill a process by PID on Windows.
244#[cfg(windows)]
245pub fn kill_process(pid: u32) -> io::Result<()> {
246    unsafe {
247        let handle = winapi::um::processthreadsapi::OpenProcess(
248            winapi::um::winnt::PROCESS_TERMINATE,
249            0,
250            pid,
251        );
252        if handle.is_null() {
253            return Err(io::Error::last_os_error());
254        }
255        let success = winapi::um::processthreadsapi::TerminateProcess(handle, 1);
256        let err = io::Error::last_os_error();
257        winapi::um::handleapi::CloseHandle(handle);
258        if success == 0 { Err(err) } else { Ok(()) }
259    }
260}
261
262/// No-op on non-Windows platforms.
263#[cfg(not(windows))]
264pub fn kill_process(_pid: u32) -> io::Result<()> {
265    Ok(())
266}
267
268/// Result of a graceful termination attempt.
269#[derive(Debug, Clone, Copy, PartialEq, Eq)]
270pub enum GracefulTerminationResult {
271    /// Process exited gracefully after SIGTERM/SIGINT.
272    GracefulExit,
273    /// Process had to be forcefully killed with SIGKILL.
274    ForcefulKill,
275    /// Process was already not running.
276    AlreadyExited,
277    /// Failed to check or terminate the process.
278    Error,
279}
280
281/// Check if a process (by PID) is still running.
282#[cfg(unix)]
283fn is_process_running(pid: u32) -> bool {
284    // kill with signal 0 checks if process exists without sending a signal
285    let result = unsafe { libc::kill(pid as libc::pid_t, 0) };
286    if result == 0 {
287        return true;
288    }
289    let err = io::Error::last_os_error();
290    // ESRCH = no such process, EPERM = exists but no permission (still running)
291    err.raw_os_error() == Some(libc::EPERM)
292}
293
294#[cfg(not(unix))]
295fn is_process_running(_pid: u32) -> bool {
296    // On non-Unix, assume running (will fail gracefully)
297    true
298}
299
300/// Gracefully terminate a process group by PID.
301///
302/// This function implements a staged termination strategy:
303/// 1. Send the initial signal (default: SIGTERM, or SIGINT for interactive processes)
304/// 2. Wait up to `grace_period` for the process to exit
305/// 3. If still running, send SIGKILL
306///
307/// Returns information about how the termination completed.
308///
309/// # Arguments
310/// * `pid` - Process ID (will be used to resolve the process group)
311/// * `initial_signal` - Signal to try first (SIGINT, SIGTERM)
312/// * `grace_period` - How long to wait before SIGKILL
313#[cfg(unix)]
314pub fn graceful_kill_process_group(
315    pid: u32,
316    initial_signal: KillSignal,
317    grace_period: std::time::Duration,
318) -> GracefulTerminationResult {
319    // Check if already exited
320    if !is_process_running(pid) {
321        return GracefulTerminationResult::AlreadyExited;
322    }
323
324    // Resolve PGID
325    let pgid = unsafe { libc::getpgid(pid as libc::pid_t) };
326    if pgid == -1 {
327        // Can't get PGID - process may have already exited
328        return GracefulTerminationResult::AlreadyExited;
329    }
330
331    // Send initial signal (SIGTERM or SIGINT)
332    let signal = match initial_signal {
333        KillSignal::Kill => libc::SIGTERM, // Don't send SIGKILL as initial
334        other => other.as_libc_signal(),
335    };
336
337    if unsafe { libc::killpg(pgid, signal) } == -1 {
338        let err = io::Error::last_os_error();
339        if err.raw_os_error() != Some(libc::ESRCH) {
340            return GracefulTerminationResult::Error;
341        }
342        return GracefulTerminationResult::AlreadyExited;
343    }
344
345    // Wait for graceful exit
346    let deadline = std::time::Instant::now() + grace_period;
347    let poll_interval = std::time::Duration::from_millis(10);
348
349    while std::time::Instant::now() < deadline {
350        if !is_process_running(pid) {
351            return GracefulTerminationResult::GracefulExit;
352        }
353        std::thread::sleep(poll_interval);
354    }
355
356    // Still running - force kill
357    if unsafe { libc::killpg(pgid, libc::SIGKILL) } == -1 {
358        let err = io::Error::last_os_error();
359        if err.raw_os_error() == Some(libc::ESRCH) {
360            // Exited between check and kill
361            return GracefulTerminationResult::GracefulExit;
362        }
363        return GracefulTerminationResult::Error;
364    }
365
366    GracefulTerminationResult::ForcefulKill
367}
368
369/// Graceful termination on non-Unix (best effort).
370///
371/// On Windows, uses GenerateConsoleCtrlEvent to send Ctrl+C (SIGINT equivalent)
372/// or Ctrl+Break (SIGTERM equivalent) to the process group, followed by
373/// TerminateProcess if the process doesn't exit in time.
374#[cfg(not(unix))]
375pub fn graceful_kill_process_group(
376    pid: u32,
377    initial_signal: KillSignal,
378    grace_period: std::time::Duration,
379) -> GracefulTerminationResult {
380    #[cfg(windows)]
381    {
382        use winapi::um::wincon::{CTRL_BREAK_EVENT, CTRL_C_EVENT, GenerateConsoleCtrlEvent};
383
384        // Check if process is still running via OpenProcess
385        let handle = unsafe {
386            winapi::um::processthreadsapi::OpenProcess(
387                winapi::um::winnt::PROCESS_QUERY_LIMITED_INFORMATION,
388                0,
389                pid,
390            )
391        };
392        if handle.is_null() {
393            return GracefulTerminationResult::AlreadyExited;
394        }
395        unsafe { winapi::um::handleapi::CloseHandle(handle) };
396
397        // Try to send console control event for graceful shutdown
398        let event = match initial_signal {
399            KillSignal::Int => CTRL_C_EVENT,
400            KillSignal::Term | KillSignal::Kill => CTRL_BREAK_EVENT,
401        };
402
403        // GenerateConsoleCtrlEvent sends to a process group (PGID = PID in Windows)
404        let sent = unsafe { GenerateConsoleCtrlEvent(event, pid) };
405        if sent != 0 {
406            // Wait for graceful exit
407            let deadline = std::time::Instant::now() + grace_period;
408            let poll_interval = std::time::Duration::from_millis(10);
409
410            while std::time::Instant::now() < deadline {
411                // Check if process has exited
412                let handle = unsafe {
413                    winapi::um::processthreadsapi::OpenProcess(
414                        winapi::um::winnt::PROCESS_QUERY_LIMITED_INFORMATION,
415                        0,
416                        pid,
417                    )
418                };
419                if handle.is_null() {
420                    return GracefulTerminationResult::GracefulExit;
421                }
422
423                let mut exit_code: u32 = 0;
424                let result = unsafe {
425                    winapi::um::processthreadsapi::GetExitCodeProcess(handle, &mut exit_code)
426                };
427                unsafe { winapi::um::handleapi::CloseHandle(handle) };
428
429                if result != 0 && exit_code != winapi::um::minwinbase::STILL_ACTIVE {
430                    return GracefulTerminationResult::GracefulExit;
431                }
432
433                std::thread::sleep(poll_interval);
434            }
435        }
436
437        // Still running or couldn't send signal - force terminate
438        match kill_process(pid) {
439            Ok(()) => GracefulTerminationResult::ForcefulKill,
440            Err(_) => GracefulTerminationResult::Error,
441        }
442    }
443    #[cfg(not(windows))]
444    {
445        let _ = (pid, initial_signal, grace_period);
446        GracefulTerminationResult::Error
447    }
448}
449
450/// Gracefully terminate a process group with default settings.
451///
452/// Uses SIGTERM and the default grace period (500ms).
453#[cfg(unix)]
454pub fn graceful_kill_process_group_default(pid: u32) -> GracefulTerminationResult {
455    graceful_kill_process_group(
456        pid,
457        KillSignal::Term,
458        std::time::Duration::from_millis(DEFAULT_GRACEFUL_TIMEOUT_MS),
459    )
460}
461
462/// Graceful termination with defaults on non-Unix.
463#[cfg(not(unix))]
464pub fn graceful_kill_process_group_default(pid: u32) -> GracefulTerminationResult {
465    graceful_kill_process_group(
466        pid,
467        KillSignal::Term,
468        std::time::Duration::from_millis(DEFAULT_GRACEFUL_TIMEOUT_MS),
469    )
470}
471
472#[cfg(test)]
473mod tests {
474    use super::*;
475
476    #[test]
477    fn test_set_parent_death_signal_no_panic() {
478        // Just verify it doesn't panic
479        #[cfg(target_os = "linux")]
480        {
481            let parent_pid = unsafe { libc::getpid() };
482            // Note: This will likely fail in tests since we're not in pre_exec
483            // but it should not panic
484            let _ = set_parent_death_signal(parent_pid);
485        }
486        #[cfg(not(target_os = "linux"))]
487        {
488            assert!(set_parent_death_signal(0).is_ok());
489        }
490    }
491
492    #[test]
493    fn test_kill_nonexistent_process_group() {
494        // Killing a non-existent process group should not error on non-Unix
495        // On Unix, ESRCH (no such process) is converted to Ok() in our implementation
496        #[cfg(unix)]
497        {
498            // Try to kill a very high PID that definitely doesn't exist
499            // Our implementation should return Ok for ESRCH
500            let result = kill_process_group(2_000_000_000);
501            // Just verify it doesn't panic - result depends on kernel
502            let _ = result;
503        }
504        #[cfg(not(unix))]
505        {
506            let result = kill_process_group(999_999);
507            assert!(result.is_ok());
508        }
509    }
510
511    #[test]
512    fn test_kill_signal_values() {
513        // Verify KillSignal enum values
514        assert_ne!(KillSignal::Int, KillSignal::Term);
515        assert_ne!(KillSignal::Term, KillSignal::Kill);
516        assert_ne!(KillSignal::Int, KillSignal::Kill);
517
518        // Test default
519        assert_eq!(KillSignal::default(), KillSignal::Kill);
520    }
521
522    #[test]
523    fn test_graceful_termination_result_debug() {
524        // Verify GracefulTerminationResult can be formatted
525        let results = [
526            GracefulTerminationResult::GracefulExit,
527            GracefulTerminationResult::ForcefulKill,
528            GracefulTerminationResult::AlreadyExited,
529            GracefulTerminationResult::Error,
530        ];
531        for result in &results {
532            let _ = format!("{result:?}");
533        }
534    }
535
536    #[test]
537    fn test_graceful_kill_nonexistent_process() {
538        // Gracefully killing a non-existent PID should return AlreadyExited
539        let result = graceful_kill_process_group_default(2_000_000_000);
540        #[cfg(unix)]
541        {
542            // On Unix, non-existent processes return AlreadyExited
543            assert_eq!(result, GracefulTerminationResult::AlreadyExited);
544        }
545        #[cfg(not(unix))]
546        {
547            // On non-Unix, behavior varies
548            let _ = result;
549        }
550    }
551
552    #[cfg(unix)]
553    #[test]
554    fn test_is_process_running_self() {
555        // Our own process should be running
556        let pid = std::process::id();
557        assert!(is_process_running(pid));
558    }
559
560    #[cfg(unix)]
561    #[test]
562    fn test_is_process_running_nonexistent() {
563        // A very high PID should not be running
564        assert!(!is_process_running(2_000_000_000));
565    }
566}