agent_exec/kill.rs
1//! Implementation of the `kill` sub-command.
2//!
3//! Signals supported: TERM, INT, KILL (case-insensitive).
4//!
5//! Signal mapping on Windows:
6//! TERM → TerminateJobObject (graceful intent; Windows has no SIGTERM, so
7//! tree termination is the closest equivalent)
8//! INT → TerminateJobObject (same; Windows has no SIGINT for arbitrary PIDs)
9//! KILL → TerminateJobObject (forced; semantically the same on Windows)
10//! * → TerminateJobObject (unknown signals treated as KILL per design.md)
11//!
12//! On Windows the supervisor records a `windows_job_name` in `state.json`.
13//! When present, `kill` opens that named Job Object directly and terminates
14//! it, which stops the entire process tree. If absent (e.g. the supervisor
15//! could not assign the process to a job), a snapshot-based tree enumeration
16//! fallback is used instead.
17
18use anyhow::Result;
19use tracing::info;
20
21use crate::jobstore::{JobDir, resolve_root};
22use crate::schema::{JobState, JobStateJob, JobStateResult, JobStatus, KillData, Response};
23
24/// Options for the `kill` sub-command.
25#[derive(Debug)]
26pub struct KillOpts<'a> {
27 pub job_id: &'a str,
28 pub root: Option<&'a str>,
29 /// Signal name: TERM | INT | KILL (default: TERM).
30 pub signal: &'a str,
31}
32
33impl<'a> Default for KillOpts<'a> {
34 fn default() -> Self {
35 KillOpts {
36 job_id: "",
37 root: None,
38 signal: "TERM",
39 }
40 }
41}
42
43/// Execute `kill`: send signal and emit JSON.
44pub fn execute(opts: KillOpts) -> Result<()> {
45 let root = resolve_root(opts.root);
46 let job_dir = JobDir::open(&root, opts.job_id)?;
47
48 let state = job_dir.read_state()?;
49 let signal_upper = opts.signal.to_uppercase();
50
51 if *state.status() != JobStatus::Running {
52 // Already stopped — no-op but still emit JSON.
53 let response = Response::new(
54 "kill",
55 KillData {
56 job_id: opts.job_id.to_string(),
57 signal: signal_upper,
58 },
59 );
60 response.print();
61 return Ok(());
62 }
63
64 if let Some(pid) = state.pid {
65 // On Windows, pass the job name from state.json so kill can use the
66 // named Job Object created by the supervisor for reliable tree termination.
67 #[cfg(windows)]
68 send_signal(pid, &signal_upper, state.windows_job_name.as_deref())?;
69 #[cfg(not(windows))]
70 send_signal(pid, &signal_upper)?;
71
72 info!(job_id = %opts.job_id, pid, signal = %signal_upper, "signal sent");
73
74 // Mark state as killed.
75 let now = crate::run::now_rfc3339_pub();
76 let new_state = JobState {
77 job: JobStateJob {
78 id: opts.job_id.to_string(),
79 status: JobStatus::Killed,
80 started_at: state.started_at().to_string(),
81 },
82 result: JobStateResult {
83 exit_code: None,
84 signal: Some(signal_upper.clone()),
85 duration_ms: None,
86 },
87 pid: Some(pid),
88 finished_at: Some(now.clone()),
89 updated_at: now,
90 windows_job_name: None,
91 };
92 job_dir.write_state(&new_state)?;
93 }
94
95 let response = Response::new(
96 "kill",
97 KillData {
98 job_id: opts.job_id.to_string(),
99 signal: signal_upper,
100 },
101 );
102 response.print();
103 Ok(())
104}
105
106#[cfg(unix)]
107fn send_signal(pid: u32, signal: &str) -> Result<()> {
108 let signum: libc::c_int = match signal {
109 "TERM" => libc::SIGTERM,
110 "INT" => libc::SIGINT,
111 "KILL" => libc::SIGKILL,
112 _ => libc::SIGKILL, // Unknown → KILL (per design.md)
113 };
114 // SAFETY: kill(2) is safe to call with any pid and valid signal number.
115 let ret = unsafe { libc::kill(pid as libc::pid_t, signum) };
116 if ret != 0 {
117 let err = std::io::Error::last_os_error();
118 // ESRCH (3): No such process — already gone, treat as success.
119 if err.raw_os_error() != Some(libc::ESRCH) {
120 return Err(err.into());
121 }
122 }
123 Ok(())
124}
125
126/// Windows signal dispatch.
127///
128/// Signal mapping (per design.md):
129/// - TERM/INT/KILL all map to Job Object termination (process tree termination).
130/// - Unknown signals are treated as KILL (same as design.md specifies).
131///
132/// Strategy:
133/// 1. If `job_name` is Some, open the named Job Object and call TerminateJobObject.
134/// 2. Otherwise fall back to snapshot-based tree enumeration starting at `pid`.
135#[cfg(windows)]
136fn send_signal(pid: u32, signal: &str, job_name: Option<&str>) -> Result<()> {
137 use tracing::debug;
138 use windows::Win32::Foundation::CloseHandle;
139
140 // Log the signal mapping for observability.
141 let _mapped = match signal {
142 "TERM" => "TerminateJobObject (TERM→process-tree kill)",
143 "INT" => "TerminateJobObject (INT→process-tree kill)",
144 "KILL" => "TerminateJobObject (KILL→process-tree kill)",
145 other => {
146 debug!(
147 signal = other,
148 "unknown signal mapped to KILL (process-tree kill)"
149 );
150 "TerminateJobObject (unknown→process-tree kill)"
151 }
152 };
153
154 // Path 1: named Job Object created by the supervisor is available.
155 if let Some(name) = job_name {
156 use windows::Win32::System::JobObjects::{
157 JOB_OBJECT_ALL_ACCESS, OpenJobObjectW, TerminateJobObject,
158 };
159 use windows::core::HSTRING;
160
161 let hname = HSTRING::from(name);
162 unsafe {
163 let job = OpenJobObjectW(JOB_OBJECT_ALL_ACCESS, false, &hname)
164 .map_err(|e| anyhow::anyhow!("OpenJobObjectW({name}) failed: {e}"))?;
165 let result = TerminateJobObject(job, 1)
166 .map_err(|e| anyhow::anyhow!("TerminateJobObject({name}) failed: {e}"));
167 let _ = CloseHandle(job);
168 return result;
169 }
170 }
171
172 // Path 2: no named Job Object — try ad-hoc assignment then terminate.
173 send_signal_no_job(pid)
174}
175
176/// Fallback Windows kill path when no named Job Object is available.
177/// Attempts to create a temporary Job Object, assign the process, and terminate.
178/// If assignment fails (process already in another job), falls back to
179/// snapshot-based recursive tree termination.
180#[cfg(windows)]
181fn send_signal_no_job(pid: u32) -> Result<()> {
182 use windows::Win32::Foundation::{CloseHandle, HANDLE};
183 use windows::Win32::System::JobObjects::{
184 AssignProcessToJobObject, CreateJobObjectW, TerminateJobObject,
185 };
186 use windows::Win32::System::Threading::{OpenProcess, PROCESS_SET_QUOTA, PROCESS_TERMINATE};
187
188 unsafe {
189 // Open the target process.
190 let proc_handle: HANDLE = OpenProcess(PROCESS_TERMINATE | PROCESS_SET_QUOTA, false, pid)?;
191
192 // Create an anonymous Job Object and assign the process to it, then
193 // terminate all processes in the job (the target process and any
194 // children it has already spawned).
195 let job: HANDLE = CreateJobObjectW(None, None)?;
196
197 // Assign process to the job (if it is already in a job this may fail,
198 // e.g. when the process is already a member of another job object).
199 // In either case, we must guarantee process-tree termination per spec.
200 if AssignProcessToJobObject(job, proc_handle).is_err() {
201 // The process belongs to an existing job object (common when the
202 // supervisor itself runs inside a job, e.g. CI environments).
203 // Fall back to recursive tree termination via snapshot enumeration
204 // so that child processes are also killed, fulfilling the MUST
205 // requirement from spec.md:55-63.
206 let _ = CloseHandle(job);
207 let _ = CloseHandle(proc_handle);
208 // Propagate error if tree termination fails — success must not be
209 // returned unless the entire process tree is actually terminated.
210 return terminate_process_tree(pid);
211 }
212
213 // Terminate all processes in the job (process tree).
214 // Per spec.md:55-63, failure here must be surfaced as an error because
215 // the caller cannot verify tree termination otherwise.
216 TerminateJobObject(job, 1).map_err(|e| {
217 let _ = CloseHandle(proc_handle);
218 let _ = CloseHandle(job);
219 anyhow::anyhow!("TerminateJobObject failed: {}", e)
220 })?;
221
222 let _ = CloseHandle(proc_handle);
223 let _ = CloseHandle(job);
224 }
225 Ok(())
226}
227
228/// Recursively terminate a process and all its descendants using
229/// CreateToolhelp32Snapshot. This is the fallback path when Job Object
230/// assignment fails (e.g., nested job objects on older Windows or CI).
231///
232/// Returns `Ok(())` only when the entire process tree (root + all descendants)
233/// has been terminated. Returns an error if snapshot enumeration fails or if
234/// the root process itself cannot be opened for termination, because in those
235/// cases tree-wide termination cannot be guaranteed (spec.md:55-63 MUST).
236#[cfg(windows)]
237fn terminate_process_tree(root_pid: u32) -> Result<()> {
238 use windows::Win32::Foundation::CloseHandle;
239 use windows::Win32::System::Diagnostics::ToolHelp::{
240 CreateToolhelp32Snapshot, PROCESSENTRY32, Process32First, Process32Next, TH32CS_SNAPPROCESS,
241 };
242 use windows::Win32::System::Threading::{OpenProcess, PROCESS_TERMINATE, TerminateProcess};
243
244 unsafe {
245 // Build a list of (pid, parent_pid) for all running processes.
246 // If we cannot take a snapshot we cannot enumerate child processes, so
247 // we must return an error rather than silently skip them.
248 let snapshot = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0)
249 .map_err(|e| anyhow::anyhow!("CreateToolhelp32Snapshot failed: {}", e))?;
250
251 let mut entries: Vec<(u32, u32)> = Vec::new();
252 let mut entry = PROCESSENTRY32 {
253 dwSize: std::mem::size_of::<PROCESSENTRY32>() as u32,
254 ..Default::default()
255 };
256
257 if Process32First(snapshot, &mut entry).is_ok() {
258 loop {
259 entries.push((entry.th32ProcessID, entry.th32ParentProcessID));
260 entry = PROCESSENTRY32 {
261 dwSize: std::mem::size_of::<PROCESSENTRY32>() as u32,
262 ..Default::default()
263 };
264 if Process32Next(snapshot, &mut entry).is_err() {
265 break;
266 }
267 }
268 }
269 let _ = CloseHandle(snapshot);
270
271 // Collect all pids in the subtree rooted at root_pid (BFS).
272 let mut to_kill: Vec<u32> = vec![root_pid];
273 let mut i = 0;
274 while i < to_kill.len() {
275 let parent = to_kill[i];
276 for &(child_pid, parent_pid) in &entries {
277 if parent_pid == parent && !to_kill.contains(&child_pid) {
278 to_kill.push(child_pid);
279 }
280 }
281 i += 1;
282 }
283
284 // Terminate all collected processes (children first, then root).
285 // Per spec.md:55-63, tree-wide termination is a MUST. Every process
286 // in the subtree must be confirmed terminated; failure to terminate
287 // any process (root or child) returns an error unless the process no
288 // longer exists (already terminated, which is a success condition).
289 use windows::Win32::Foundation::ERROR_INVALID_PARAMETER;
290
291 for &target_pid in to_kill.iter().rev() {
292 match OpenProcess(PROCESS_TERMINATE, false, target_pid) {
293 Ok(h) => {
294 let result = TerminateProcess(h, 1);
295 let _ = CloseHandle(h);
296 result.map_err(|e| {
297 anyhow::anyhow!("TerminateProcess for pid {} failed: {}", target_pid, e)
298 })?;
299 }
300 Err(e) => {
301 // ERROR_INVALID_PARAMETER (87) means the process no longer
302 // exists — it has already exited, which is a success
303 // condition (the process is gone). Any other error means
304 // we could not open the process handle and therefore cannot
305 // confirm or perform termination, which violates the MUST.
306 if e.code() != ERROR_INVALID_PARAMETER.to_hresult() {
307 return Err(anyhow::anyhow!(
308 "OpenProcess for pid {} failed (process may still be running): {}",
309 target_pid,
310 e
311 ));
312 }
313 // Process already gone — treat as success.
314 }
315 }
316 }
317 }
318 Ok(())
319}
320
321#[cfg(not(any(unix, windows)))]
322fn send_signal(_pid: u32, _signal: &str) -> Result<()> {
323 anyhow::bail!("kill not supported on this platform");
324}