ralph_workflow/pipeline/idle_timeout/
kill.rs1use crate::executor::{AgentChild, ProcessExecutor};
4use std::sync::{Arc, Mutex};
5use std::time::Duration;
6
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub(crate) enum KillResult {
10 TerminatedByTerm,
12 TerminatedByKill,
14 SignalsSentAwaitingExit { escalated: bool },
21 Failed,
23}
24
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub struct KillConfig {
27 sigterm_grace: Duration,
28 poll_interval: Duration,
29 sigkill_confirm_timeout: Duration,
30 post_sigkill_hard_cap: Duration,
31 sigkill_resend_interval: Duration,
32}
33
34impl KillConfig {
35 pub const fn new(
36 sigterm_grace: Duration,
37 poll_interval: Duration,
38 sigkill_confirm_timeout: Duration,
39 post_sigkill_hard_cap: Duration,
40 sigkill_resend_interval: Duration,
41 ) -> Self {
42 Self {
43 sigterm_grace,
44 poll_interval,
45 sigkill_confirm_timeout,
46 post_sigkill_hard_cap,
47 sigkill_resend_interval,
48 }
49 }
50
51 pub fn sigterm_grace(&self) -> Duration {
52 self.sigterm_grace
53 }
54
55 pub fn poll_interval(&self) -> Duration {
56 self.poll_interval
57 }
58
59 pub fn sigkill_confirm_timeout(&self) -> Duration {
60 self.sigkill_confirm_timeout
61 }
62
63 pub fn post_sigkill_hard_cap(&self) -> Duration {
64 self.post_sigkill_hard_cap
65 }
66
67 pub fn sigkill_resend_interval(&self) -> Duration {
68 self.sigkill_resend_interval
69 }
70}
71
72pub const DEFAULT_KILL_CONFIG: KillConfig = KillConfig::new(
80 Duration::from_secs(5),
81 Duration::from_millis(100),
82 Duration::from_millis(500),
83 Duration::from_secs(5),
84 Duration::from_secs(1),
85);
86
87#[cfg(unix)]
88pub(crate) fn force_kill_best_effort(pid: u32, executor: &dyn ProcessExecutor) -> bool {
89 let pid_str = pid.to_string();
90 let pgid_str = format!("-{pid_str}");
91
92 let group_ok = executor
95 .execute("kill", &["-KILL", "--", &pgid_str], &[], None)
96 .map(|o| o.status.success())
97 .unwrap_or(false);
98
99 if group_ok {
100 return true;
101 }
102
103 executor
104 .execute("kill", &["-KILL", &pid_str], &[], None)
105 .map(|o| o.status.success())
106 .unwrap_or(false)
107}
108
109#[cfg(windows)]
110pub(crate) fn force_kill_best_effort(pid: u32, executor: &dyn ProcessExecutor) -> bool {
111 executor
112 .execute(
113 "taskkill",
114 &["/F", "/T", "/PID", &pid.to_string()],
115 &[],
116 None,
117 )
118 .map(|o| o.status.success())
119 .unwrap_or(false)
120}
121
122#[cfg(unix)]
127pub(crate) fn kill_process(
128 pid: u32,
129 executor: &dyn ProcessExecutor,
130 child: Option<&Arc<Mutex<Box<dyn AgentChild>>>>,
131 config: KillConfig,
132) -> KillResult {
133 let pid_str = pid.to_string();
134 let pgid_str = format!("-{pid_str}");
135
136 let term_ok = executor
138 .execute("kill", &["-TERM", "--", &pgid_str], &[], None)
139 .map(|o| o.status.success())
140 .unwrap_or(false)
141 || executor
142 .execute("kill", &["-TERM", &pid_str], &[], None)
143 .map(|o| o.status.success())
144 .unwrap_or(false);
145
146 if !term_ok {
147 return KillResult::Failed;
148 }
149
150 if let Some(child_arc) = child {
151 let grace_deadline = std::time::Instant::now() + config.sigterm_grace;
152 while std::time::Instant::now() < grace_deadline {
153 let status = {
154 let mut locked_child = child_arc
155 .lock()
156 .expect("child process mutex poisoned - indicates panic in another thread");
157 locked_child.try_wait()
158 };
159
160 match status {
161 Ok(Some(_)) => return KillResult::TerminatedByTerm,
162 Ok(None) => std::thread::sleep(config.poll_interval),
163 Err(_) => std::thread::sleep(config.poll_interval),
164 }
165 }
166
167 let kill_ok = executor
168 .execute("kill", &["-KILL", "--", &pgid_str], &[], None)
169 .map(|o| o.status.success())
170 .unwrap_or(false)
171 || executor
172 .execute("kill", &["-KILL", &pid_str], &[], None)
173 .map(|o| o.status.success())
174 .unwrap_or(false);
175 if !kill_ok {
176 return KillResult::Failed;
177 }
178
179 let confirm_deadline = std::time::Instant::now() + config.sigkill_confirm_timeout;
180 while std::time::Instant::now() < confirm_deadline {
181 let status = {
182 let mut locked_child = child_arc
183 .lock()
184 .expect("child process mutex poisoned - indicates panic in another thread");
185 locked_child.try_wait()
186 };
187
188 match status {
189 Ok(Some(_)) => return KillResult::TerminatedByKill,
190 Ok(None) => std::thread::sleep(config.poll_interval),
191 Err(_) => std::thread::sleep(config.poll_interval),
192 }
193 }
194
195 return KillResult::SignalsSentAwaitingExit { escalated: true };
196 }
197
198 KillResult::TerminatedByTerm
199}
200
201#[cfg(windows)]
205pub(crate) fn kill_process(
206 pid: u32,
207 executor: &dyn ProcessExecutor,
208 child: Option<&Arc<Mutex<Box<dyn AgentChild>>>>,
209 config: KillConfig,
210) -> KillResult {
211 let result = executor.execute(
212 "taskkill",
213 &["/F", "/T", "/PID", &pid.to_string()],
214 &[],
215 None,
216 );
217 let kill_ok = result.map(|o| o.status.success()).unwrap_or(false);
218 if !kill_ok {
219 return KillResult::Failed;
220 }
221
222 if let Some(child_arc) = child {
223 let confirm_deadline = std::time::Instant::now() + config.sigkill_confirm_timeout;
224 while std::time::Instant::now() < confirm_deadline {
225 let status = {
226 let mut locked_child = child_arc
227 .lock()
228 .expect("child process mutex poisoned - indicates panic in another thread");
229 locked_child.try_wait()
230 };
231
232 match status {
233 Ok(Some(_)) => return KillResult::TerminatedByKill,
234 Ok(None) => std::thread::sleep(config.poll_interval),
235 Err(_) => std::thread::sleep(config.poll_interval),
236 }
237 }
238
239 return KillResult::SignalsSentAwaitingExit { escalated: true };
240 }
241
242 KillResult::TerminatedByKill
243}