1use super::{
7 AgentChildHandle, AgentSpawnConfig, ChildProcessInfo, ProcessOutput, RealAgentChild,
8 SpawnedProcess,
9};
10#[cfg(target_os = "macos")]
11use crate::executor::macos::child_info_from_libproc;
12use crate::executor::ps::parse_ps_output;
13use crate::executor::{
14 bfs::collect_descendants,
15 command::{build_agent_command_internal, build_command},
16 ps::{
17 child_info_from_descendant_pids, warn_child_process_detection_conservative,
18 warn_child_process_detection_degraded,
19 },
20};
21use std::io;
22use std::path::Path;
23
24pub trait ProcessExecutor: Send + Sync + std::fmt::Debug {
32 fn execute(
49 &self,
50 command: &str,
51 args: &[&str],
52 env: &[(String, String)],
53 workdir: Option<&Path>,
54 ) -> io::Result<ProcessOutput>;
55
56 fn spawn(
77 &self,
78 command: &str,
79 args: &[&str],
80 env: &[(String, String)],
81 workdir: Option<&Path>,
82 ) -> io::Result<SpawnedProcess> {
83 let mut child = build_command(command, args, env, workdir)
84 .stdin(std::process::Stdio::piped())
85 .stdout(std::process::Stdio::piped())
86 .stderr(std::process::Stdio::piped())
87 .spawn()?;
88 let stdin = child.stdin.take();
89 Ok(SpawnedProcess {
90 stdin,
91 inner: child,
92 })
93 }
94
95 fn spawn_agent(&self, config: &AgentSpawnConfig) -> io::Result<AgentChildHandle> {
119 let child = build_agent_command_internal(
120 &config.command,
121 &config.args,
122 &config.env,
123 &config.prompt,
124 )
125 .stdin(std::process::Stdio::null())
126 .stdout(std::process::Stdio::piped())
127 .stderr(std::process::Stdio::piped())
128 .spawn()?;
129 wrap_agent_child(child)
130 }
131
132 fn command_exists(&self, command: &str) -> bool {
145 match self.execute(command, &[], &[], None) {
146 Ok(output) => output.status.success(),
147 Err(_) => false,
148 }
149 }
150
151 fn get_child_process_info(&self, parent_pid: u32) -> ChildProcessInfo {
165 #[cfg(unix)]
166 return get_child_process_info_unix(self, parent_pid);
167 #[cfg(not(unix))]
168 {
169 let _ = parent_pid;
170 ChildProcessInfo::NONE
171 }
172 }
173
174 #[cfg(unix)]
184 fn kill_process_group(&self, _pgid: u32) -> io::Result<()> {
185 Ok(())
186 }
187}
188
189const PS_ATTEMPTS: [&[&str]; 6] = [
190 &[
191 "-ax", "-o", "pid=", "-o", "ppid=", "-o", "pgid=", "-o", "stat=", "-o", "cputime=", "-o",
192 "comm=",
193 ],
194 &[
195 "-e", "-o", "pid=", "-o", "ppid=", "-o", "pgid=", "-o", "stat=", "-o", "cputime=", "-o",
196 "comm=",
197 ],
198 &[
199 "-ax", "-o", "pid=", "-o", "ppid=", "-o", "pgid=", "-o", "stat=", "-o", "cputime=",
200 ],
201 &[
202 "-e", "-o", "pid=", "-o", "ppid=", "-o", "pgid=", "-o", "stat=", "-o", "cputime=",
203 ],
204 &["-ax", "-o", "pid=", "-o", "ppid=", "-o", "cputime="],
205 &["-e", "-o", "pid=", "-o", "ppid=", "-o", "cputime="],
206];
207
208fn try_ps_args<E: ProcessExecutor + ?Sized>(
209 executor: &E,
210 args: &[&str],
211 parent_pid: u32,
212) -> Option<ChildProcessInfo> {
213 let out = executor.execute("ps", args, &[], None).ok()?;
214 out.status
215 .success()
216 .then(|| parse_ps_output(&out.stdout, parent_pid))
217 .flatten()
218}
219
220fn try_ps_output_chain<E: ProcessExecutor + ?Sized>(
221 executor: &E,
222 parent_pid: u32,
223) -> Option<ChildProcessInfo> {
224 PS_ATTEMPTS
225 .iter()
226 .find_map(|&args| try_ps_args(executor, args, parent_pid))
227}
228
229#[cfg(unix)]
230fn try_libproc_fallback(parent_pid: u32) -> Option<ChildProcessInfo> {
231 #[cfg(target_os = "macos")]
232 return child_info_from_libproc(parent_pid);
233 #[cfg(not(target_os = "macos"))]
234 {
235 let _ = parent_pid;
236 None
237 }
238}
239
240#[cfg(unix)]
241fn get_child_process_info_unix<E: ProcessExecutor + ?Sized>(
242 executor: &E,
243 parent_pid: u32,
244) -> ChildProcessInfo {
245 try_ps_output_chain(executor, parent_pid)
246 .or_else(|| try_libproc_fallback(parent_pid))
247 .or_else(|| try_pgrep_fallback(executor, parent_pid))
248 .unwrap_or_else(|| {
249 warn_child_process_detection_degraded();
250 ChildProcessInfo::NONE
251 })
252}
253
254fn try_pgrep_fallback<E: ProcessExecutor + ?Sized>(
255 executor: &E,
256 parent_pid: u32,
257) -> Option<ChildProcessInfo> {
258 let descendants = collect_descendants(executor, parent_pid);
259 if !descendants.is_empty() {
260 warn_child_process_detection_conservative();
261 return Some(child_info_from_descendant_pids(&descendants));
262 }
263 None
264}
265
266impl SpawnedProcess {
267 pub fn wait(&mut self) -> io::Result<()> {
273 self.inner.wait()?;
274 Ok(())
275 }
276
277 pub fn try_wait(&mut self) -> io::Result<Option<std::process::ExitStatus>> {
283 self.inner.try_wait()
284 }
285
286 pub fn kill(&mut self) -> io::Result<()> {
292 self.inner.kill()
293 }
294}
295
296fn wrap_agent_child(mut child: std::process::Child) -> io::Result<AgentChildHandle> {
297 let stdout = child
298 .stdout
299 .take()
300 .ok_or_else(|| io::Error::other("Failed to capture stdout"))?;
301 let stderr = child
302 .stderr
303 .take()
304 .ok_or_else(|| io::Error::other("Failed to capture stderr"))?;
305 Ok(AgentChildHandle {
306 stdout: Box::new(stdout),
307 stderr: Box::new(stderr),
308 inner: Box::new(RealAgentChild(child)),
309 })
310}
311
312#[cfg(test)]
313mod tests {
314 use super::*;
315 use std::collections::HashMap;
316
317 #[cfg(unix)]
318 fn ok_output(stdout: &str) -> ProcessOutput {
319 use std::os::unix::process::ExitStatusExt;
320
321 ProcessOutput {
322 status: std::process::ExitStatus::from_raw(0),
323 stdout: stdout.to_string(),
324 stderr: String::new(),
325 }
326 }
327
328 #[cfg(unix)]
329 type ResultMap = HashMap<(String, Vec<String>), ProcessOutput>;
330
331 #[cfg(unix)]
332 #[derive(Debug)]
333 struct TestExecutor {
334 results: ResultMap,
335 }
336
337 #[cfg(unix)]
338 impl TestExecutor {
339 fn new(results: ResultMap) -> Self {
340 Self { results }
341 }
342 }
343
344 #[cfg(unix)]
345 impl ProcessExecutor for TestExecutor {
346 fn execute(
347 &self,
348 command: &str,
349 args: &[&str],
350 _env: &[(String, String)],
351 _workdir: Option<&std::path::Path>,
352 ) -> std::io::Result<ProcessOutput> {
353 self.results
354 .get(&(
355 command.to_string(),
356 args.iter().map(ToString::to_string).collect(),
357 ))
358 .cloned()
359 .ok_or_else(|| std::io::Error::other("unexpected execute"))
360 }
361 }
362
363 #[cfg(unix)]
364 fn ps_key() -> (String, Vec<String>) {
365 (
366 "ps".to_string(),
367 vec![
368 "-ax".to_string(),
369 "-o".to_string(),
370 "pid=".to_string(),
371 "-o".to_string(),
372 "ppid=".to_string(),
373 "-o".to_string(),
374 "cputime=".to_string(),
375 ],
376 )
377 }
378
379 #[cfg(unix)]
380 fn ps_key_with_state_and_group() -> (String, Vec<String>) {
381 (
382 "ps".to_string(),
383 vec![
384 "-ax".to_string(),
385 "-o".to_string(),
386 "pid=".to_string(),
387 "-o".to_string(),
388 "ppid=".to_string(),
389 "-o".to_string(),
390 "pgid=".to_string(),
391 "-o".to_string(),
392 "stat=".to_string(),
393 "-o".to_string(),
394 "cputime=".to_string(),
395 ],
396 )
397 }
398
399 #[cfg(unix)]
400 fn ps_key_with_state_group_and_command() -> (String, Vec<String>) {
401 (
402 "ps".to_string(),
403 vec![
404 "-ax".to_string(),
405 "-o".to_string(),
406 "pid=".to_string(),
407 "-o".to_string(),
408 "ppid=".to_string(),
409 "-o".to_string(),
410 "pgid=".to_string(),
411 "-o".to_string(),
412 "stat=".to_string(),
413 "-o".to_string(),
414 "cputime=".to_string(),
415 "-o".to_string(),
416 "comm=".to_string(),
417 ],
418 )
419 }
420
421 #[cfg(unix)]
422 fn pgrep_key(parent_pid: u32) -> (String, Vec<String>) {
423 (
424 "pgrep".to_string(),
425 vec!["-P".to_string(), parent_pid.to_string()],
426 )
427 }
428
429 #[test]
430 #[cfg(unix)]
431 fn get_child_process_info_legacy_ps_output_is_conservative_about_current_activity() {
432 let pid = 4242;
433
434 let mut results: ResultMap = HashMap::new();
435 results.insert(
436 ps_key(),
437 ok_output("12345 4242 0:01.50\n12346 4242 0:03.00\n99999 1 0:10.00\n"),
438 );
439
440 let exec = TestExecutor::new(results);
441 let info = exec.get_child_process_info(pid);
442 assert_eq!(info.child_count, 2, "should find 2 children of pid 4242");
443 assert_eq!(
444 info.active_child_count, 0,
445 "legacy ps output without state or process-group columns must not report current activity"
446 );
447 assert_eq!(
448 info.cpu_time_ms,
449 1500 + 3000,
450 "should sum CPU times of both children"
451 );
452 assert!(info.has_children());
453 }
454
455 #[test]
456 #[cfg(unix)]
457 fn get_child_process_info_no_children_returns_zero() {
458 let pid = 4242;
459
460 let mut results: ResultMap = HashMap::new();
461 results.insert(ps_key(), ok_output("99999 1 0:10.00\n"));
462
463 let exec = TestExecutor::new(results);
464 let info = exec.get_child_process_info(pid);
465 assert_eq!(info.child_count, 0);
466 assert_eq!(info.active_child_count, 0);
467 assert_eq!(info.cpu_time_ms, 0);
468 assert!(!info.has_children());
469 }
470
471 #[test]
472 #[cfg(unix)]
473 fn parse_cputime_formats() {
474 let pid = 100;
475
476 let mut results: ResultMap = HashMap::new();
477 results.insert(ps_key(), ok_output("200 100 01:02:03\n"));
478
479 let exec = TestExecutor::new(results);
480 let info = exec.get_child_process_info(pid);
481 assert_eq!(
482 info.cpu_time_ms,
483 (3600 + 2 * 60 + 3) * 1000,
484 "HH:MM:SS should parse to correct ms"
485 );
486 }
487
488 #[test]
489 #[cfg(unix)]
490 fn parse_cputime_with_day_prefix() {
491 let pid = 100;
492
493 let mut results: ResultMap = HashMap::new();
494 results.insert(ps_key(), ok_output("200 100 1-02:03:04\n"));
495
496 let exec = TestExecutor::new(results);
497 let info = exec.get_child_process_info(pid);
498 assert_eq!(
499 info.cpu_time_ms,
500 ((24 + 2) * 3600 + 3 * 60 + 4) * 1000,
501 "DD-HH:MM:SS should parse to correct ms"
502 );
503 }
504
505 #[test]
506 #[cfg(unix)]
507 fn get_child_process_info_includes_grandchildren() {
508 let parent = 100;
509 let ps_output = "200 100 0:01.00\n300 200 0:02.00\n999 1 0:05.00\n";
510
511 let mut results: ResultMap = HashMap::new();
512 results.insert(ps_key(), ok_output(ps_output));
513
514 let exec = TestExecutor::new(results);
515 let info = exec.get_child_process_info(parent);
516 assert_eq!(
517 info.child_count, 2,
518 "should count both child and grandchild"
519 );
520 assert_eq!(
521 info.cpu_time_ms,
522 1000 + 2000,
523 "should sum CPU of child and grandchild"
524 );
525 }
526
527 #[test]
528 #[cfg(unix)]
529 fn get_child_process_info_excludes_unrelated_processes() {
530 let parent = 100;
531 let ps_output = "200 100 0:01.00\n300 400 0:02.00\n400 1 0:03.00\n";
532
533 let mut results: ResultMap = HashMap::new();
534 results.insert(ps_key(), ok_output(ps_output));
535
536 let exec = TestExecutor::new(results);
537 let info = exec.get_child_process_info(parent);
538 assert_eq!(info.child_count, 1, "should only count PID 200");
539 assert_eq!(
540 info.active_child_count, 0,
541 "legacy ps output without state columns must remain conservative even for related descendants"
542 );
543 assert_eq!(info.cpu_time_ms, 1000, "should only sum CPU of PID 200");
544 }
545
546 #[test]
547 #[cfg(unix)]
548 fn get_child_process_info_deep_tree() {
549 let parent = 100;
550 let ps_output = "200 100 0:01.00\n300 200 0:02.00\n400 300 0:03.00\n";
551
552 let mut results: ResultMap = HashMap::new();
553 results.insert(ps_key(), ok_output(ps_output));
554
555 let exec = TestExecutor::new(results);
556 let info = exec.get_child_process_info(parent);
557 assert_eq!(
558 info.child_count, 3,
559 "should count all 3 levels of descendants"
560 );
561 assert_eq!(
562 info.cpu_time_ms,
563 1000 + 2000 + 3000,
564 "should sum CPU across all descendants"
565 );
566 }
567
568 #[test]
569 #[cfg(unix)]
570 fn get_child_process_info_pgrep_fallback_does_not_report_active_children() {
571 let parent = 100;
572
573 let mut results: ResultMap = HashMap::new();
574 results.insert(pgrep_key(100), ok_output("200\n300\n"));
575 results.insert(pgrep_key(200), ok_output("400\n"));
576 results.insert(pgrep_key(300), ok_output(""));
577 results.insert(pgrep_key(400), ok_output(""));
578
579 let exec = TestExecutor::new(results);
580 let info = exec.get_child_process_info(parent);
581
582 assert_eq!(info.child_count, 3);
583 assert_eq!(
584 info.active_child_count, 0,
585 "fallback without process state or cpu evidence must not report active children"
586 );
587 assert_eq!(info.cpu_time_ms, 0);
588 assert_ne!(
589 info.descendant_pid_signature, 0,
590 "observable descendants should retain a stable signature even in fallback mode"
591 );
592 }
593
594 #[test]
595 #[cfg(unix)]
596 fn get_child_process_info_excludes_descendants_in_other_process_groups() {
597 let parent = 100;
598
599 let mut results: ResultMap = HashMap::new();
600 results.insert(
601 ps_key_with_state_and_group(),
602 ok_output(
603 "200 100 100 S 0:01.00\n201 100 201 S 0:05.00\n300 200 100 S 0:02.00\n301 201 201 S 0:09.00\n",
604 ),
605 );
606
607 let exec = TestExecutor::new(results);
608 let info = exec.get_child_process_info(parent);
609
610 assert_eq!(
611 info.child_count, 2,
612 "only descendants that remain in the agent process group should qualify"
613 );
614 assert_eq!(
615 info.active_child_count, 0,
616 "sleeping same-process-group descendants should remain observable without suppressing timeout"
617 );
618 assert_eq!(
619 info.cpu_time_ms,
620 1000 + 2000,
621 "detached descendants in a different process group must be excluded"
622 );
623 }
624
625 #[test]
626 #[cfg(unix)]
627 fn get_child_process_info_counts_busy_shell_without_descendants_as_current_work() {
628 let parent = 100;
629
630 let mut results: ResultMap = HashMap::new();
631 results.insert(
632 ps_key_with_state_group_and_command(),
633 ok_output("200 100 100 R 0:01.00 sh\n"),
634 );
635
636 let exec = TestExecutor::new(results);
637 let info = exec.get_child_process_info(parent);
638
639 assert_eq!(info.child_count, 1);
640 assert_eq!(
641 info.active_child_count, 1,
642 "a shell process that is itself running with accumulated CPU must count as current child work even without descendants"
643 );
644 assert_eq!(info.cpu_time_ms, 1000);
645 }
646
647 #[test]
648 #[cfg(unix)]
649 fn get_child_process_info_keeps_non_wrapper_busy_processes_active() {
650 let parent = 100;
651
652 let mut results: ResultMap = HashMap::new();
653 results.insert(
654 ps_key_with_state_group_and_command(),
655 ok_output("200 100 100 R 0:01.00 python3\n"),
656 );
657
658 let exec = TestExecutor::new(results);
659 let info = exec.get_child_process_info(parent);
660
661 assert_eq!(info.child_count, 1);
662 assert_eq!(
663 info.active_child_count, 1,
664 "real worker processes must still count as current child work when they are busy"
665 );
666 assert_eq!(info.cpu_time_ms, 1000);
667 }
668
669 #[test]
670 #[cfg(unix)]
671 fn get_child_process_info_excludes_zombie_descendants() {
672 let parent = 100;
673
674 let mut results: ResultMap = HashMap::new();
675 results.insert(
676 ps_key_with_state_and_group(),
677 ok_output("200 100 100 S 0:01.00\n201 100 100 Z 0:05.00\n"),
678 );
679
680 let exec = TestExecutor::new(results);
681 let info = exec.get_child_process_info(parent);
682
683 assert_eq!(info.child_count, 1, "zombie descendants must not qualify");
684 assert_eq!(info.active_child_count, 0);
685 assert_eq!(info.cpu_time_ms, 1000, "zombie cpu time must be ignored");
686 }
687
688 #[test]
689 #[cfg(unix)]
690 fn get_child_process_info_returns_none_when_only_non_qualifying_descendants_exist() {
691 let parent = 100;
692
693 let mut results: ResultMap = HashMap::new();
694 results.insert(
695 ps_key_with_state_and_group(),
696 ok_output("200 100 200 S 0:01.00\n300 200 200 S 0:02.00\n"),
697 );
698
699 let exec = TestExecutor::new(results);
700 let info = exec.get_child_process_info(parent);
701
702 assert_eq!(
703 info,
704 ChildProcessInfo::NONE,
705 "an empty qualified descendant set must normalize to no active child work"
706 );
707 }
708
709 #[test]
710 #[cfg(unix)]
711 fn get_child_process_info_excludes_zero_cpu_descendants_without_activity_evidence() {
712 let parent = 100;
713
714 let mut results: ResultMap = HashMap::new();
715 results.insert(
716 ps_key_with_state_and_group(),
717 ok_output("200 100 100 S 0:00.00\n"),
718 );
719
720 let exec = TestExecutor::new(results);
721 let info = exec.get_child_process_info(parent);
722
723 assert_eq!(info.child_count, 1);
724 assert_eq!(info.active_child_count, 0);
725 assert_eq!(info.cpu_time_ms, 0);
726 }
727
728 #[test]
729 #[cfg(unix)]
730 fn get_child_process_info_does_not_count_running_zero_cpu_descendants_as_currently_active() {
731 let parent = 100;
732
733 let mut results: ResultMap = HashMap::new();
734 results.insert(
735 ps_key_with_state_and_group(),
736 ok_output("200 100 100 R 0:00.00\n"),
737 );
738
739 let exec = TestExecutor::new(results);
740 let info = exec.get_child_process_info(parent);
741
742 assert_eq!(info.child_count, 1);
743 assert_eq!(
744 info.active_child_count, 0,
745 "running descendants with zero accumulated CPU should not yet count as current work"
746 );
747 assert_eq!(info.cpu_time_ms, 0);
748 }
749
750 #[test]
751 #[cfg(unix)]
752 fn get_child_process_info_excludes_sleeping_descendants_with_only_historical_cpu() {
753 let parent = 100;
754
755 let mut results: ResultMap = HashMap::new();
756 results.insert(
757 ps_key_with_state_and_group(),
758 ok_output("200 100 100 S 0:01.00\n300 200 100 S 0:02.00\n"),
759 );
760
761 let exec = TestExecutor::new(results);
762 let info = exec.get_child_process_info(parent);
763
764 assert_eq!(info.child_count, 2);
765 assert_eq!(info.active_child_count, 0);
766 assert_eq!(info.cpu_time_ms, 3000);
767 }
768
769 #[test]
770 #[cfg(unix)]
771 fn get_child_process_info_pgrep_fallback_is_conservative() {
772 let parent = 100;
773
774 let mut results: ResultMap = HashMap::new();
775 results.insert(pgrep_key(100), ok_output("200\n300\n"));
776 results.insert(pgrep_key(200), ok_output(""));
777 results.insert(pgrep_key(300), ok_output(""));
778
779 let exec = TestExecutor::new(results);
780 let info = exec.get_child_process_info(parent);
781
782 assert!(info.has_children());
783 assert!(
784 !info.has_currently_active_children(),
785 "fallback without process-state or cpu evidence must not suppress idle timeout"
786 );
787 assert_eq!(info.cpu_time_ms, 0);
788 }
789
790 #[test]
791 #[cfg(target_os = "macos")]
792 fn child_pid_entry_count_converts_libproc_bytes_to_pid_count() {
793 use super::super::macos::child_pid_entry_count;
794
795 let pid_width = i32::try_from(std::mem::size_of::<libc::pid_t>())
796 .expect("pid_t size should fit in i32");
797
798 assert_eq!(child_pid_entry_count(pid_width * 3), Some(3));
799 assert_eq!(child_pid_entry_count(pid_width), Some(1));
800 assert_eq!(child_pid_entry_count(0), Some(0));
801 }
802}