ralph_workflow/pipeline/idle_timeout/
kill.rs1use crate::executor::{AgentChild, ProcessExecutor};
4use std::sync::{Arc, Mutex};
5use std::time::Duration;
6
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum KillResult {
10 TerminatedByTerm,
12 TerminatedByKill,
14 SignalsSentAwaitingExit { escalated: bool },
21 Failed,
23}
24
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub struct KillConfig {
27 sigterm_grace: Duration,
28 poll_interval: Duration,
29 sigkill_confirm_timeout: Duration,
30 post_sigkill_hard_cap: Duration,
31 sigkill_resend_interval: Duration,
32}
33
34impl KillConfig {
35 #[must_use]
36 pub const fn new(
37 sigterm_grace: Duration,
38 poll_interval: Duration,
39 sigkill_confirm_timeout: Duration,
40 post_sigkill_hard_cap: Duration,
41 sigkill_resend_interval: Duration,
42 ) -> Self {
43 Self {
44 sigterm_grace,
45 poll_interval,
46 sigkill_confirm_timeout,
47 post_sigkill_hard_cap,
48 sigkill_resend_interval,
49 }
50 }
51
52 #[must_use]
53 pub const fn sigterm_grace(&self) -> Duration {
54 self.sigterm_grace
55 }
56
57 #[must_use]
58 pub const fn poll_interval(&self) -> Duration {
59 self.poll_interval
60 }
61
62 #[must_use]
63 pub const fn sigkill_confirm_timeout(&self) -> Duration {
64 self.sigkill_confirm_timeout
65 }
66
67 #[must_use]
68 pub const fn post_sigkill_hard_cap(&self) -> Duration {
69 self.post_sigkill_hard_cap
70 }
71
72 #[must_use]
73 pub const fn sigkill_resend_interval(&self) -> Duration {
74 self.sigkill_resend_interval
75 }
76}
77
78pub const DEFAULT_KILL_CONFIG: KillConfig = KillConfig::new(
86 Duration::from_secs(5),
87 Duration::from_millis(100),
88 Duration::from_millis(500),
89 Duration::from_secs(5),
90 Duration::from_secs(1),
91);
92
93#[cfg(unix)]
94pub fn force_kill_best_effort(pid: u32, executor: &dyn ProcessExecutor) -> bool {
95 let pid_str = pid.to_string();
96 let process_group_id = format!("-{pid_str}");
97
98 let group_ok = executor
101 .execute("kill", &["-KILL", "--", &process_group_id], &[], None)
102 .map(|o| o.status.success())
103 .unwrap_or(false);
104
105 if group_ok {
106 return true;
107 }
108
109 executor
110 .execute("kill", &["-KILL", &pid_str], &[], None)
111 .map(|o| o.status.success())
112 .unwrap_or(false)
113}
114
115#[cfg(windows)]
116pub(crate) fn force_kill_best_effort(pid: u32, executor: &dyn ProcessExecutor) -> bool {
117 executor
118 .execute(
119 "taskkill",
120 &["/F", "/T", "/PID", &pid.to_string()],
121 &[],
122 None,
123 )
124 .map(|o| o.status.success())
125 .unwrap_or(false)
126}
127
128#[cfg(unix)]
133pub fn kill_process(
134 pid: u32,
135 executor: &dyn ProcessExecutor,
136 child: Option<&Arc<Mutex<Box<dyn AgentChild>>>>,
137 config: KillConfig,
138) -> KillResult {
139 let pid_str = pid.to_string();
140 let process_group_id = format!("-{pid_str}");
141
142 let term_ok = executor
144 .execute("kill", &["-TERM", "--", &process_group_id], &[], None)
145 .map(|o| o.status.success())
146 .unwrap_or(false)
147 || executor
148 .execute("kill", &["-TERM", &pid_str], &[], None)
149 .map(|o| o.status.success())
150 .unwrap_or(false);
151
152 if !term_ok {
153 return KillResult::Failed;
154 }
155
156 if let Some(child_arc) = child {
157 let grace_deadline = std::time::Instant::now() + config.sigterm_grace;
158 while std::time::Instant::now() < grace_deadline {
159 let status = {
160 let mut locked_child = child_arc
161 .lock()
162 .expect("child process mutex poisoned - indicates panic in another thread");
163 locked_child.try_wait()
164 };
165
166 match status {
167 Ok(Some(_)) => return KillResult::TerminatedByTerm,
168 Ok(None) | Err(_) => std::thread::sleep(config.poll_interval),
169 }
170 }
171
172 let kill_ok = executor
173 .execute("kill", &["-KILL", "--", &process_group_id], &[], None)
174 .map(|o| o.status.success())
175 .unwrap_or(false)
176 || executor
177 .execute("kill", &["-KILL", &pid_str], &[], None)
178 .map(|o| o.status.success())
179 .unwrap_or(false);
180 if !kill_ok {
181 return KillResult::Failed;
182 }
183
184 let confirm_deadline = std::time::Instant::now() + config.sigkill_confirm_timeout;
185 while std::time::Instant::now() < confirm_deadline {
186 let status = {
187 let mut locked_child = child_arc
188 .lock()
189 .expect("child process mutex poisoned - indicates panic in another thread");
190 locked_child.try_wait()
191 };
192
193 match status {
194 Ok(Some(_)) => return KillResult::TerminatedByKill,
195 Ok(None) | Err(_) => std::thread::sleep(config.poll_interval),
196 }
197 }
198
199 return KillResult::SignalsSentAwaitingExit { escalated: true };
200 }
201
202 KillResult::TerminatedByTerm
203}
204
205#[cfg(windows)]
209pub(crate) fn kill_process(
210 pid: u32,
211 executor: &dyn ProcessExecutor,
212 child: Option<&Arc<Mutex<Box<dyn AgentChild>>>>,
213 config: KillConfig,
214) -> KillResult {
215 let result = executor.execute(
216 "taskkill",
217 &["/F", "/T", "/PID", &pid.to_string()],
218 &[],
219 None,
220 );
221 let kill_ok = result.map(|o| o.status.success()).unwrap_or(false);
222 if !kill_ok {
223 return KillResult::Failed;
224 }
225
226 if let Some(child_arc) = child {
227 let confirm_deadline = std::time::Instant::now() + config.sigkill_confirm_timeout;
228 while std::time::Instant::now() < confirm_deadline {
229 let status = {
230 let mut locked_child = child_arc
231 .lock()
232 .expect("child process mutex poisoned - indicates panic in another thread");
233 locked_child.try_wait()
234 };
235
236 match status {
237 Ok(Some(_)) => return KillResult::TerminatedByKill,
238 Ok(None) | Err(_) => std::thread::sleep(config.poll_interval),
239 }
240 }
241
242 return KillResult::SignalsSentAwaitingExit { escalated: true };
243 }
244
245 KillResult::TerminatedByKill
246}