Skip to main content

agent_exec/
kill.rs

1//! Implementation of the `kill` sub-command.
2//!
3//! Signals supported: TERM, INT, KILL (case-insensitive).
4//!
5//! Signal mapping on Windows:
6//!   TERM → TerminateJobObject (graceful intent; Windows has no SIGTERM, so
7//!           tree termination is the closest equivalent)
8//!   INT  → TerminateJobObject (same; Windows has no SIGINT for arbitrary PIDs)
9//!   KILL → TerminateJobObject (forced; semantically the same on Windows)
10//!   *    → TerminateJobObject (unknown signals treated as KILL per design.md)
11//!
12//! On Windows the supervisor records a `windows_job_name` in `state.json`.
13//! When present, `kill` opens that named Job Object directly and terminates
14//! it, which stops the entire process tree.  If absent (e.g. the supervisor
15//! could not assign the process to a job), a snapshot-based tree enumeration
16//! fallback is used instead.
17
18use anyhow::Result;
19use tracing::info;
20
21use crate::jobstore::{JobDir, resolve_root};
22use crate::schema::{JobState, JobStateJob, JobStateResult, JobStatus, KillData, Response};
23
24/// Options for the `kill` sub-command.
25#[derive(Debug)]
26pub struct KillOpts<'a> {
27    pub job_id: &'a str,
28    pub root: Option<&'a str>,
29    /// Signal name: TERM | INT | KILL (default: TERM).
30    pub signal: &'a str,
31}
32
33impl<'a> Default for KillOpts<'a> {
34    fn default() -> Self {
35        KillOpts {
36            job_id: "",
37            root: None,
38            signal: "TERM",
39        }
40    }
41}
42
43/// Execute `kill`: send signal and emit JSON.
44pub fn execute(opts: KillOpts) -> Result<()> {
45    let root = resolve_root(opts.root);
46    let job_dir = JobDir::open(&root, opts.job_id)?;
47
48    let state = job_dir.read_state()?;
49    let signal_upper = opts.signal.to_uppercase();
50
51    if *state.status() != JobStatus::Running {
52        // Already stopped — no-op but still emit JSON.
53        let response = Response::new(
54            "kill",
55            KillData {
56                job_id: opts.job_id.to_string(),
57                signal: signal_upper,
58            },
59        );
60        response.print();
61        return Ok(());
62    }
63
64    if let Some(pid) = state.pid {
65        // On Windows, pass the job name from state.json so kill can use the
66        // named Job Object created by the supervisor for reliable tree termination.
67        #[cfg(windows)]
68        send_signal(pid, &signal_upper, state.windows_job_name.as_deref())?;
69        #[cfg(not(windows))]
70        send_signal(pid, &signal_upper)?;
71
72        info!(job_id = %opts.job_id, pid, signal = %signal_upper, "signal sent");
73
74        // Mark state as killed.
75        let now = crate::run::now_rfc3339_pub();
76        let new_state = JobState {
77            job: JobStateJob {
78                id: opts.job_id.to_string(),
79                status: JobStatus::Killed,
80                started_at: state.started_at().to_string(),
81            },
82            result: JobStateResult {
83                exit_code: None,
84                signal: Some(signal_upper.clone()),
85                duration_ms: None,
86            },
87            pid: Some(pid),
88            finished_at: Some(now.clone()),
89            updated_at: now,
90            windows_job_name: None,
91        };
92        job_dir.write_state(&new_state)?;
93    }
94
95    let response = Response::new(
96        "kill",
97        KillData {
98            job_id: opts.job_id.to_string(),
99            signal: signal_upper,
100        },
101    );
102    response.print();
103    Ok(())
104}
105
106#[cfg(unix)]
107fn send_signal(pid: u32, signal: &str) -> Result<()> {
108    let signum: libc::c_int = match signal {
109        "TERM" => libc::SIGTERM,
110        "INT" => libc::SIGINT,
111        "KILL" => libc::SIGKILL,
112        _ => libc::SIGKILL, // Unknown → KILL (per design.md)
113    };
114    // SAFETY: kill(2) is safe to call with any pid and valid signal number.
115    let ret = unsafe { libc::kill(pid as libc::pid_t, signum) };
116    if ret != 0 {
117        let err = std::io::Error::last_os_error();
118        // ESRCH (3): No such process — already gone, treat as success.
119        if err.raw_os_error() != Some(libc::ESRCH) {
120            return Err(err.into());
121        }
122    }
123    Ok(())
124}
125
126/// Windows signal dispatch.
127///
128/// Signal mapping (per design.md):
129/// - TERM/INT/KILL all map to Job Object termination (process tree termination).
130/// - Unknown signals are treated as KILL (same as design.md specifies).
131///
132/// Strategy:
133/// 1. If `job_name` is Some, open the named Job Object and call TerminateJobObject.
134/// 2. Otherwise fall back to snapshot-based tree enumeration starting at `pid`.
135#[cfg(windows)]
136fn send_signal(pid: u32, signal: &str, job_name: Option<&str>) -> Result<()> {
137    use tracing::debug;
138    use windows::Win32::Foundation::CloseHandle;
139
140    // Log the signal mapping for observability.
141    let _mapped = match signal {
142        "TERM" => "TerminateJobObject (TERM→process-tree kill)",
143        "INT" => "TerminateJobObject (INT→process-tree kill)",
144        "KILL" => "TerminateJobObject (KILL→process-tree kill)",
145        other => {
146            debug!(
147                signal = other,
148                "unknown signal mapped to KILL (process-tree kill)"
149            );
150            "TerminateJobObject (unknown→process-tree kill)"
151        }
152    };
153
154    // Path 1: named Job Object created by the supervisor is available.
155    if let Some(name) = job_name {
156        use windows::Win32::System::JobObjects::{
157            JOB_OBJECT_ALL_ACCESS, OpenJobObjectW, TerminateJobObject,
158        };
159        use windows::core::HSTRING;
160
161        let hname = HSTRING::from(name);
162        unsafe {
163            let job = OpenJobObjectW(JOB_OBJECT_ALL_ACCESS, false, &hname)
164                .map_err(|e| anyhow::anyhow!("OpenJobObjectW({name}) failed: {e}"))?;
165            let result = TerminateJobObject(job, 1)
166                .map_err(|e| anyhow::anyhow!("TerminateJobObject({name}) failed: {e}"));
167            let _ = CloseHandle(job);
168            return result;
169        }
170    }
171
172    // Path 2: no named Job Object — try ad-hoc assignment then terminate.
173    send_signal_no_job(pid)
174}
175
176/// Fallback Windows kill path when no named Job Object is available.
177/// Attempts to create a temporary Job Object, assign the process, and terminate.
178/// If assignment fails (process already in another job), falls back to
179/// snapshot-based recursive tree termination.
180#[cfg(windows)]
181fn send_signal_no_job(pid: u32) -> Result<()> {
182    use windows::Win32::Foundation::{CloseHandle, HANDLE};
183    use windows::Win32::System::JobObjects::{
184        AssignProcessToJobObject, CreateJobObjectW, TerminateJobObject,
185    };
186    use windows::Win32::System::Threading::{OpenProcess, PROCESS_SET_QUOTA, PROCESS_TERMINATE};
187
188    unsafe {
189        // Open the target process.
190        let proc_handle: HANDLE = OpenProcess(PROCESS_TERMINATE | PROCESS_SET_QUOTA, false, pid)?;
191
192        // Create an anonymous Job Object and assign the process to it, then
193        // terminate all processes in the job (the target process and any
194        // children it has already spawned).
195        let job: HANDLE = CreateJobObjectW(None, None)?;
196
197        // Assign process to the job (if it is already in a job this may fail,
198        // e.g. when the process is already a member of another job object).
199        // In either case, we must guarantee process-tree termination per spec.
200        if AssignProcessToJobObject(job, proc_handle).is_err() {
201            // The process belongs to an existing job object (common when the
202            // supervisor itself runs inside a job, e.g. CI environments).
203            // Fall back to recursive tree termination via snapshot enumeration
204            // so that child processes are also killed, fulfilling the MUST
205            // requirement from spec.md:55-63.
206            let _ = CloseHandle(job);
207            let _ = CloseHandle(proc_handle);
208            // Propagate error if tree termination fails — success must not be
209            // returned unless the entire process tree is actually terminated.
210            return terminate_process_tree(pid);
211        }
212
213        // Terminate all processes in the job (process tree).
214        // Per spec.md:55-63, failure here must be surfaced as an error because
215        // the caller cannot verify tree termination otherwise.
216        TerminateJobObject(job, 1).map_err(|e| {
217            let _ = CloseHandle(proc_handle);
218            let _ = CloseHandle(job);
219            anyhow::anyhow!("TerminateJobObject failed: {}", e)
220        })?;
221
222        let _ = CloseHandle(proc_handle);
223        let _ = CloseHandle(job);
224    }
225    Ok(())
226}
227
228/// Recursively terminate a process and all its descendants using
229/// CreateToolhelp32Snapshot. This is the fallback path when Job Object
230/// assignment fails (e.g., nested job objects on older Windows or CI).
231///
232/// Returns `Ok(())` only when the entire process tree (root + all descendants)
233/// has been terminated. Returns an error if snapshot enumeration fails or if
234/// the root process itself cannot be opened for termination, because in those
235/// cases tree-wide termination cannot be guaranteed (spec.md:55-63 MUST).
236#[cfg(windows)]
237fn terminate_process_tree(root_pid: u32) -> Result<()> {
238    use windows::Win32::Foundation::CloseHandle;
239    use windows::Win32::System::Diagnostics::ToolHelp::{
240        CreateToolhelp32Snapshot, PROCESSENTRY32, Process32First, Process32Next, TH32CS_SNAPPROCESS,
241    };
242    use windows::Win32::System::Threading::{OpenProcess, PROCESS_TERMINATE, TerminateProcess};
243
244    unsafe {
245        // Build a list of (pid, parent_pid) for all running processes.
246        // If we cannot take a snapshot we cannot enumerate child processes, so
247        // we must return an error rather than silently skip them.
248        let snapshot = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0)
249            .map_err(|e| anyhow::anyhow!("CreateToolhelp32Snapshot failed: {}", e))?;
250
251        let mut entries: Vec<(u32, u32)> = Vec::new();
252        let mut entry = PROCESSENTRY32 {
253            dwSize: std::mem::size_of::<PROCESSENTRY32>() as u32,
254            ..Default::default()
255        };
256
257        if Process32First(snapshot, &mut entry).is_ok() {
258            loop {
259                entries.push((entry.th32ProcessID, entry.th32ParentProcessID));
260                entry = PROCESSENTRY32 {
261                    dwSize: std::mem::size_of::<PROCESSENTRY32>() as u32,
262                    ..Default::default()
263                };
264                if Process32Next(snapshot, &mut entry).is_err() {
265                    break;
266                }
267            }
268        }
269        let _ = CloseHandle(snapshot);
270
271        // Collect all pids in the subtree rooted at root_pid (BFS).
272        let mut to_kill: Vec<u32> = vec![root_pid];
273        let mut i = 0;
274        while i < to_kill.len() {
275            let parent = to_kill[i];
276            for &(child_pid, parent_pid) in &entries {
277                if parent_pid == parent && !to_kill.contains(&child_pid) {
278                    to_kill.push(child_pid);
279                }
280            }
281            i += 1;
282        }
283
284        // Terminate all collected processes (children first, then root).
285        // Per spec.md:55-63, tree-wide termination is a MUST.  Every process
286        // in the subtree must be confirmed terminated; failure to terminate
287        // any process (root or child) returns an error unless the process no
288        // longer exists (already terminated, which is a success condition).
289        use windows::Win32::Foundation::ERROR_INVALID_PARAMETER;
290
291        for &target_pid in to_kill.iter().rev() {
292            match OpenProcess(PROCESS_TERMINATE, false, target_pid) {
293                Ok(h) => {
294                    let result = TerminateProcess(h, 1);
295                    let _ = CloseHandle(h);
296                    result.map_err(|e| {
297                        anyhow::anyhow!("TerminateProcess for pid {} failed: {}", target_pid, e)
298                    })?;
299                }
300                Err(e) => {
301                    // ERROR_INVALID_PARAMETER (87) means the process no longer
302                    // exists — it has already exited, which is a success
303                    // condition (the process is gone).  Any other error means
304                    // we could not open the process handle and therefore cannot
305                    // confirm or perform termination, which violates the MUST.
306                    if e.code() != ERROR_INVALID_PARAMETER.to_hresult() {
307                        return Err(anyhow::anyhow!(
308                            "OpenProcess for pid {} failed (process may still be running): {}",
309                            target_pid,
310                            e
311                        ));
312                    }
313                    // Process already gone — treat as success.
314                }
315            }
316        }
317    }
318    Ok(())
319}
320
321#[cfg(not(any(unix, windows)))]
322fn send_signal(_pid: u32, _signal: &str) -> Result<()> {
323    anyhow::bail!("kill not supported on this platform");
324}