ralph_workflow/pipeline/idle_timeout/
kill.rs1use crate::executor::{AgentChild, ProcessExecutor};
4use std::sync::{Arc, Mutex};
5use std::time::Duration;
6
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub(crate) enum KillResult {
10 TerminatedByTerm,
12 TerminatedByKill,
14 SignalsSentAwaitingExit { escalated: bool },
21 Failed,
23}
24
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub struct KillConfig {
27 sigterm_grace: Duration,
28 poll_interval: Duration,
29 sigkill_confirm_timeout: Duration,
30 post_sigkill_hard_cap: Duration,
31 sigkill_resend_interval: Duration,
32}
33
34impl KillConfig {
35 pub const fn new(
36 sigterm_grace: Duration,
37 poll_interval: Duration,
38 sigkill_confirm_timeout: Duration,
39 post_sigkill_hard_cap: Duration,
40 sigkill_resend_interval: Duration,
41 ) -> Self {
42 Self {
43 sigterm_grace,
44 poll_interval,
45 sigkill_confirm_timeout,
46 post_sigkill_hard_cap,
47 sigkill_resend_interval,
48 }
49 }
50
51 pub fn sigterm_grace(&self) -> Duration {
52 self.sigterm_grace
53 }
54
55 pub fn poll_interval(&self) -> Duration {
56 self.poll_interval
57 }
58
59 pub fn sigkill_confirm_timeout(&self) -> Duration {
60 self.sigkill_confirm_timeout
61 }
62
63 pub fn post_sigkill_hard_cap(&self) -> Duration {
64 self.post_sigkill_hard_cap
65 }
66
67 pub fn sigkill_resend_interval(&self) -> Duration {
68 self.sigkill_resend_interval
69 }
70}
71
72pub const DEFAULT_KILL_CONFIG: KillConfig = KillConfig::new(
80 Duration::from_secs(5),
81 Duration::from_millis(100),
82 Duration::from_millis(500),
83 Duration::from_secs(5),
84 Duration::from_secs(1),
85);
86
87#[cfg(unix)]
88pub(crate) fn force_kill_best_effort(pid: u32, executor: &dyn ProcessExecutor) -> bool {
89 let pid_str = pid.to_string();
90 let pgid_str = format!("-{pid_str}");
91
92 let group_ok = executor
95 .execute("kill", &["-KILL", "--", &pgid_str], &[], None)
96 .map(|o| o.status.success())
97 .unwrap_or(false);
98
99 if group_ok {
100 return true;
101 }
102
103 executor
104 .execute("kill", &["-KILL", &pid_str], &[], None)
105 .map(|o| o.status.success())
106 .unwrap_or(false)
107}
108
109#[cfg(windows)]
110pub(crate) fn force_kill_best_effort(pid: u32, executor: &dyn ProcessExecutor) -> bool {
111 executor
112 .execute(
113 "taskkill",
114 &["/F", "/T", "/PID", &pid.to_string()],
115 &[],
116 None,
117 )
118 .map(|o| o.status.success())
119 .unwrap_or(false)
120}
121
122#[cfg(unix)]
127pub(crate) fn kill_process(
128 pid: u32,
129 executor: &dyn ProcessExecutor,
130 child: Option<&Arc<Mutex<Box<dyn AgentChild>>>>,
131 config: KillConfig,
132) -> KillResult {
133 let pid_str = pid.to_string();
134 let pgid_str = format!("-{pid_str}");
135
136 let term_ok = executor
138 .execute("kill", &["-TERM", "--", &pgid_str], &[], None)
139 .map(|o| o.status.success())
140 .unwrap_or(false)
141 || executor
142 .execute("kill", &["-TERM", &pid_str], &[], None)
143 .map(|o| o.status.success())
144 .unwrap_or(false);
145
146 if !term_ok {
147 return KillResult::Failed;
148 }
149
150 if let Some(child_arc) = child {
151 let grace_deadline = std::time::Instant::now() + config.sigterm_grace;
152 while std::time::Instant::now() < grace_deadline {
153 let status = {
154 let mut locked_child = child_arc.lock().unwrap();
155 locked_child.try_wait()
156 };
157
158 match status {
159 Ok(Some(_)) => return KillResult::TerminatedByTerm,
160 Ok(None) => std::thread::sleep(config.poll_interval),
161 Err(_) => std::thread::sleep(config.poll_interval),
162 }
163 }
164
165 let kill_ok = executor
166 .execute("kill", &["-KILL", "--", &pgid_str], &[], None)
167 .map(|o| o.status.success())
168 .unwrap_or(false)
169 || executor
170 .execute("kill", &["-KILL", &pid_str], &[], None)
171 .map(|o| o.status.success())
172 .unwrap_or(false);
173 if !kill_ok {
174 return KillResult::Failed;
175 }
176
177 let confirm_deadline = std::time::Instant::now() + config.sigkill_confirm_timeout;
178 while std::time::Instant::now() < confirm_deadline {
179 let status = {
180 let mut locked_child = child_arc.lock().unwrap();
181 locked_child.try_wait()
182 };
183
184 match status {
185 Ok(Some(_)) => return KillResult::TerminatedByKill,
186 Ok(None) => std::thread::sleep(config.poll_interval),
187 Err(_) => std::thread::sleep(config.poll_interval),
188 }
189 }
190
191 return KillResult::SignalsSentAwaitingExit { escalated: true };
192 }
193
194 KillResult::TerminatedByTerm
195}
196
197#[cfg(windows)]
201pub(crate) fn kill_process(
202 pid: u32,
203 executor: &dyn ProcessExecutor,
204 child: Option<&Arc<Mutex<Box<dyn AgentChild>>>>,
205 config: KillConfig,
206) -> KillResult {
207 let result = executor.execute(
208 "taskkill",
209 &["/F", "/T", "/PID", &pid.to_string()],
210 &[],
211 None,
212 );
213 let kill_ok = result.map(|o| o.status.success()).unwrap_or(false);
214 if !kill_ok {
215 return KillResult::Failed;
216 }
217
218 if let Some(child_arc) = child {
219 let confirm_deadline = std::time::Instant::now() + config.sigkill_confirm_timeout;
220 while std::time::Instant::now() < confirm_deadline {
221 let status = {
222 let mut locked_child = child_arc.lock().unwrap();
223 locked_child.try_wait()
224 };
225
226 match status {
227 Ok(Some(_)) => return KillResult::TerminatedByKill,
228 Ok(None) => std::thread::sleep(config.poll_interval),
229 Err(_) => std::thread::sleep(config.poll_interval),
230 }
231 }
232
233 return KillResult::SignalsSentAwaitingExit { escalated: true };
234 }
235
236 KillResult::TerminatedByKill
237}