runtimo_core/capabilities/
kill.rs1use crate::capability::{Capability, Context, Output};
29use crate::processes::ProcessSnapshot;
30use crate::{Error, Result};
31use serde::{Deserialize, Serialize};
32use serde_json::Value;
33use std::time::Duration;
34
35#[cfg(test)]
36use std::process::Command;
37
38#[allow(clippy::arithmetic_side_effects)]
44fn get_process_start_time(pid: u32) -> Option<u64> {
45 let stat_path = format!("/proc/{}/stat", pid);
46 let content = std::fs::read_to_string(&stat_path).ok()?;
47 let last_paren = content.rfind(')')?;
48 let fields: Vec<&str> = content[last_paren + 2..].split_whitespace().collect();
49 fields.get(19)?.parse::<u64>().ok()
50}
51fn get_process_start_time_retry(pid: u32) -> Option<u64> {
52 #[allow(clippy::arithmetic_side_effects)] for attempt in 0..3 {
54 if attempt > 0 {
55 std::thread::sleep(std::time::Duration::from_millis(10 * (1 << attempt)));
56 }
57 if let Some(start_time) = get_process_start_time(pid) {
58 return Some(start_time);
59 }
60 }
61 None
62}
63
64fn get_process_cgroup(pid: u32) -> Option<String> {
68 std::fs::read_to_string(format!("/proc/{}/cgroup", pid)).ok()
69}
70
71fn is_systemd_service(cgroup: &str) -> bool {
73 cgroup.contains("/system.slice/")
74 || cgroup.contains("/init.scope")
75 || cgroup.contains("systemd")
76}
77
78fn protected_pids() -> Vec<u32> {
82 let mut pids = vec![1, 2];
83 let self_pid = std::process::id();
84 pids.push(self_pid);
85
86 if let Ok(status) = std::fs::read_to_string(format!("/proc/{}/status", self_pid)) {
88 if let Some(ppid_str) = status
89 .lines()
90 .find(|l| l.starts_with("PPid:"))
91 .and_then(|l| l.split_whitespace().nth(1))
92 {
93 if let Ok(ppid) = ppid_str.parse::<u32>() {
94 pids.push(ppid);
95 }
96 }
97 }
98
99 if let Ok(status) = std::fs::read_to_string(format!("/proc/{}/status", self_pid)) {
101 if let Some(sid_str) = status
102 .lines()
103 .find(|l| l.starts_with("Sid:"))
104 .and_then(|l| l.split_whitespace().nth(1))
105 {
106 if let Ok(sid) = sid_str.parse::<u32>() {
107 if sid != 0 {
108 pids.push(sid);
109 }
110 }
111 }
112 }
113
114 if let Ok(status) = std::fs::read_to_string(format!("/proc/{}/status", self_pid)) {
116 if let Some(pgid_str) = status
117 .lines()
118 .find(|l| l.starts_with("NSpgid:"))
119 .and_then(|l| l.split_whitespace().nth(1))
120 {
121 if let Ok(pgid) = pgid_str.parse::<u32>() {
122 if pgid != 0 {
123 pids.push(pgid);
124 }
125 }
126 }
127 }
128
129 if let Ok(entries) = std::fs::read_dir("/proc") {
131 for entry in entries.flatten() {
132 if let Ok(name) = entry.file_name().into_string() {
133 if let Ok(pid) = name.parse::<u32>() {
134 if let Some(cgroup) = get_process_cgroup(pid) {
135 if is_systemd_service(&cgroup) {
136 pids.push(pid);
137 }
138 }
139 }
140 }
141 }
142 }
143
144 pids.sort_unstable();
145 pids.dedup();
146 pids
147}
148
149#[derive(Debug, Clone, Serialize, Deserialize)]
151pub struct KillArgs {
152 pub pid: u32,
154 pub signal: Option<i32>,
156}
157
158#[allow(clippy::exhaustive_structs)]
163pub struct Kill;
164
165impl Capability for Kill {
166 fn name(&self) -> &'static str {
167 "Kill"
168 }
169
170 fn description(&self) -> &'static str {
171 "kill PID. Protected: init,kthreadd,self. Custom sig ok."
172 }
173
174 fn schema(&self) -> Value {
179 serde_json::json!({
180 "type": "object",
181 "properties": {
182 "pid": { "type": "integer", "minimum": 1 },
183 "signal": {
184 "type": "integer",
185 "anyOf": [
186 { "minimum": 1, "maximum": 31 },
187 { "enum": [64] }
188 ]
189 }
190 },
191 "required": ["pid"]
192 })
193 }
194
195 fn validate(&self, args: &Value) -> Result<()> {
196 let args: KillArgs = serde_json::from_value(args.clone())
197 .map_err(|e| Error::SchemaValidationFailed(e.to_string()))?;
198
199 if let Some(signal) = args.signal {
201 if !(1..=31).contains(&signal) && signal != 64 {
202 return Err(Error::SchemaValidationFailed(format!(
203 "Invalid signal {}: must be 1-31 or 64 (POSIX signals)",
204 signal
205 )));
206 }
207 }
208
209 Ok(())
210 }
211
212 fn execute(&self, args: &Value, ctx: &Context) -> Result<Output> {
213 let args: KillArgs = serde_json::from_value(args.clone())
214 .map_err(|e| Error::ExecutionFailed(e.to_string()))?;
215
216 let protected = protected_pids();
218 if protected.contains(&args.pid) {
219 return Err(Error::ExecutionFailed(format!(
220 "PID {} is a protected system process (protected: {:?})",
221 args.pid, protected
222 )));
223 }
224
225 if ctx.dry_run {
227 return Ok(Output {
229 success: true,
230 data: serde_json::json!({
231 "pid": args.pid,
232 "killed": false,
233 "dry_run": true,
234 "signal": args.signal.unwrap_or(15),
235 }),
236 message: Some(format!("DRY RUN: would kill PID {}", args.pid)),
237 });
238 }
239
240 let process_before = ProcessSnapshot::capture();
242 let process_exists = process_before.processes.iter().any(|p| p.pid == args.pid);
243
244 if !process_exists {
245 return Ok(Output {
246 success: false,
247 data: serde_json::json!({
248 "pid": args.pid,
249 "killed": false,
250 "reason": "Process not found"
251 }),
252 message: Some(format!("Process {} not found", args.pid)),
253 });
254 }
255
256 let process_info: Option<(String, String)> = process_before
258 .processes
259 .iter()
260 .find(|p| p.pid == args.pid)
261 .map(|p| (p.command.clone(), p.user.clone()));
262
263 let start_time_before = get_process_start_time_retry(args.pid);
265
266 let start_time_before_confirm = get_process_start_time_retry(args.pid);
269 if start_time_before != start_time_before_confirm {
270 return Ok(Output {
271 success: false,
272 data: serde_json::json!({
273 "pid": args.pid,
274 "killed": false,
275 "reason": "PID reused between safety checks",
276 "pid_reused": true,
277 }),
278 message: Some(format!(
279 "PID {} was reused by a different process (start time changed before kill)",
280 args.pid
281 )),
282 });
283 }
284
285 let signal = args.signal.unwrap_or(15);
287
288 #[allow(clippy::cast_possible_wrap)]
292 let kill_result = unsafe { libc::kill(args.pid as libc::pid_t, signal) };
293 let success = kill_result == 0;
294 let stderr_str = if success {
295 String::new()
296 } else {
297 std::io::Error::last_os_error().to_string()
298 };
299
300 std::thread::sleep(Duration::from_millis(500));
302
303 ProcessSnapshot::clear_cache();
305
306 let process_after = ProcessSnapshot::capture();
308
309 let process_still_exists = process_after
311 .processes
312 .iter()
313 .any(|p| p.pid == args.pid && !p.stat.starts_with('Z'));
314 let pid_reused = match (start_time_before, get_process_start_time_retry(args.pid)) {
316 (Some(before_time), Some(after_time)) => before_time != after_time,
317 (None, _) => false,
318 (Some(_), None) => true,
319 };
320
321 let killed_success = success && !process_still_exists && !pid_reused;
322
323 let message = if killed_success {
324 format!("Killed process {} (signal {})", args.pid, signal)
325 } else if pid_reused {
326 format!(
327 "PID {} was reused by a different process (start time changed)",
328 args.pid
329 )
330 } else if !success {
331 format!("Failed to kill process {}: {}", args.pid, stderr_str)
332 } else {
333 format!("Process {} still exists after signal {}", args.pid, signal)
334 };
335
336 Ok(Output {
337 success: killed_success,
338 data: serde_json::json!({
339 "pid": args.pid,
340 "killed": killed_success,
341 "signal": signal,
342 "command": process_info.as_ref().map(|(cmd, _)| cmd),
343 "user": process_info.as_ref().map(|(_, user)| user),
344 "stderr": if success { String::new() } else { stderr_str },
345 "pid_reused": pid_reused,
346 "process_before": {
347 "count": process_before.summary.total_processes,
348 "zombies": process_before.summary.zombie_count
349 },
350 "process_after": {
351 "count": process_after.summary.total_processes,
352 "zombies": process_after.summary.zombie_count
353 }
354 }),
355 message: Some(message),
356 })
357 }
358}
359
360#[cfg(test)]
361#[allow(clippy::unnecessary_map_or)]
362mod tests {
363 use super::*;
364 use crate::capability::Capability;
365 use std::thread;
366 use std::time::Duration;
367
368 #[test]
369 fn test_kill_schema() {
370 let cap = Kill;
371 let _schema = cap.schema();
372 let mut child = Command::new("sleep").arg("60").spawn().unwrap();
375 let pid = child.id();
376
377 let result = get_process_start_time_retry(pid);
378 assert!(
379 result.is_some(),
380 "Should read start time for running process"
381 );
382
383 child.kill().ok();
384 let _ = child.wait();
385
386 let result = get_process_start_time_retry(999999);
388 assert!(result.is_none(), "Non-existent PID should return None");
389 }
390
391 #[test]
392 fn test_kill_protected_pid() {
393 let cap = Kill;
394 let result = cap.execute(
396 &serde_json::json!({ "pid": 1 }),
397 &Context {
398 dry_run: false,
399 job_id: "test".into(),
400 working_dir: std::env::current_dir().unwrap(),
401 },
402 );
403
404 assert!(result.is_err());
406 assert!(result
407 .unwrap_err()
408 .to_string()
409 .contains("protected system process"));
410 }
411
412 #[test]
413 fn test_kill_self_protected() {
414 let cap = Kill;
415 let self_pid = std::process::id();
416 let result = cap.execute(
417 &serde_json::json!({ "pid": self_pid }),
418 &Context {
419 dry_run: false,
420 job_id: "test".into(),
421 working_dir: std::env::current_dir().unwrap(),
422 },
423 );
424
425 assert!(result.is_err());
426 assert!(result.unwrap_err().to_string().contains("protected"));
427 }
428
429 #[test]
430 fn test_kill_nonexistent() {
431 let cap = Kill;
432 let result = cap
434 .execute(
435 &serde_json::json!({ "pid": 999999 }),
436 &Context {
437 dry_run: false,
438 job_id: "test".into(),
439 working_dir: std::env::current_dir().unwrap(),
440 },
441 )
442 .unwrap();
443
444 assert!(!result.success);
445 assert!(result.data["killed"].as_bool() == Some(false));
446 }
447
448 #[test]
449 fn test_kill_dry_run() {
450 let cap = Kill;
451 let result = cap
455 .execute(
456 &serde_json::json!({ "pid": 999998 }),
457 &Context {
458 dry_run: true,
459 job_id: "test".into(),
460 working_dir: std::env::current_dir().unwrap(),
461 },
462 )
463 .unwrap();
464
465 assert!(result.success);
466 assert!(result.data["dry_run"].as_bool() == Some(true));
467 assert!(result.data["killed"].as_bool() == Some(false));
468 }
469
470 #[test]
471 fn test_kill_actual_process() {
472 let mut child = Command::new("sleep").arg("60").spawn().unwrap();
474 let pid = child.id();
475
476 thread::sleep(Duration::from_millis(100));
478
479 let pre_check = Command::new("kill").arg("-0").arg(pid.to_string()).output();
481 assert!(
482 pre_check.unwrap().status.success(),
483 "Process should exist before kill"
484 );
485
486 let protected = protected_pids();
491 if protected.contains(&pid) {
492 let _ = child.kill();
493 let _ = child.wait();
494 eprintln!(
495 "SKIP: spawned child PID {pid} is in protected_pids set \
496 ({protected:?}); kill blocked by safety guard. \
497 This is expected in CI containers."
498 );
499 return;
500 }
501
502 ProcessSnapshot::clear_cache();
504
505 let cap = Kill;
507 let result = cap
508 .execute(
509 &serde_json::json!({ "pid": pid, "signal": 9 }),
510 &Context {
511 dry_run: false,
512 job_id: "test".into(),
513 working_dir: std::env::current_dir().unwrap(),
514 },
515 )
516 .unwrap();
517
518 assert!(
520 result.data["killed"].as_bool() == Some(true),
521 "Kill failed: {:?}",
522 result.data
523 );
524 assert!(
525 result.data["signal"].as_i64() == Some(9),
526 "Should use SIGKILL"
527 );
528
529 let _ = child.wait();
531
532 let post_check = Command::new("kill").arg("-0").arg(pid.to_string()).output();
534 let still_alive = post_check.map_or(false, |o| o.status.success());
535 assert!(
536 !still_alive,
537 "Process {} should be dead after kill and reap",
538 pid
539 );
540 }
541
542 #[test]
543 fn test_get_process_start_time() {
544 let mut child = Command::new("sleep").arg("60").spawn().unwrap();
546 let pid = child.id();
547
548 let start_time = get_process_start_time(pid);
549 assert!(
550 start_time.is_some(),
551 "Should be able to read start time for running process"
552 );
553
554 let start_time2 = get_process_start_time(pid);
556 assert_eq!(start_time, start_time2, "Start time should be stable");
557
558 child.kill().ok();
559 let _ = child.wait();
560 }
561
562 #[test]
563 fn test_get_process_start_time_nonexistent() {
564 let result = get_process_start_time(999999);
565 assert!(result.is_none(), "Non-existent PID should return None");
566 }
567
568 #[test]
569 fn test_signal_validation_rejects_negative() {
570 let cap = Kill;
572 let result = cap.validate(&serde_json::json!({ "pid": 999998, "signal": -1 }));
573 assert!(result.is_err());
574 assert!(result.unwrap_err().to_string().contains("Invalid signal"));
575 }
576
577 #[test]
578 fn test_signal_validation_rejects_zero() {
579 let cap = Kill;
581 let result = cap.validate(&serde_json::json!({ "pid": 999998, "signal": 0 }));
582 assert!(result.is_err());
583 assert!(result.unwrap_err().to_string().contains("Invalid signal"));
584 }
585
586 #[test]
587 fn test_signal_validation_rejects_out_of_range() {
588 let cap = Kill;
590 let result = cap.validate(&serde_json::json!({ "pid": 999998, "signal": 32 }));
591 assert!(result.is_err());
592 }
593
594 #[test]
595 fn test_signal_validation_accepts_valid_signals() {
596 let cap = Kill;
597 for sig in [1, 9, 15, 31, 64] {
598 let result = cap.validate(&serde_json::json!({ "pid": 999998, "signal": sig }));
599 assert!(result.is_ok(), "Signal {} should be valid", sig);
600 }
601 }
602
603 #[test]
604 fn test_dry_run_hides_process_info() {
605 let cap = Kill;
607 let result = cap
608 .execute(
609 &serde_json::json!({ "pid": 999998 }),
610 &Context {
611 dry_run: true,
612 job_id: "test".into(),
613 working_dir: std::env::current_dir().unwrap(),
614 },
615 )
616 .unwrap();
617
618 assert!(result.success);
619 assert!(result.data["dry_run"].as_bool() == Some(true));
620 assert!(
621 result.data.get("command").is_none(),
622 "dry-run must not expose command"
623 );
624 assert!(
625 result.data.get("user").is_none(),
626 "dry-run must not expose user"
627 );
628 assert!(
629 result.data.get("process_exists").is_none(),
630 "dry-run must not expose process_exists"
631 );
632 }
633
634 #[test]
635 fn test_protected_pids_includes_self_and_parent() {
636 let protected = protected_pids();
637 let self_pid = std::process::id();
638 assert!(protected.contains(&1), "PID 1 should be protected");
639 assert!(protected.contains(&2), "PID 2 should be protected");
640 assert!(
641 protected.contains(&self_pid),
642 "self PID should be protected"
643 );
644 }
645
646 #[test]
647 fn test_get_process_start_time_retry() {
648 let mut child = Command::new("sleep").arg("60").spawn().unwrap();
650 let pid = child.id();
651
652 let result = get_process_start_time_retry(pid);
653 assert!(
654 result.is_some(),
655 "Should read start time for running process"
656 );
657
658 child.kill().ok();
659 let _ = child.wait();
660
661 let result = get_process_start_time_retry(999999);
663 assert!(result.is_none(), "Non-existent PID should return None");
664 }
665}