1use super::{
7 AgentChildHandle, AgentSpawnConfig, ChildProcessInfo, ProcessOutput, RealAgentChild,
8 SpawnedProcess,
9};
10#[cfg(target_os = "macos")]
11use crate::executor::macos::child_info_from_libproc;
12use crate::executor::ps::parse_ps_output;
13use crate::executor::{
14 bfs::collect_descendants,
15 command::{build_agent_command_internal, build_command},
16 ps::{
17 child_info_from_descendant_pids, warn_child_process_detection_conservative,
18 warn_child_process_detection_degraded,
19 },
20};
21use std::io;
22use std::path::Path;
23
24pub trait ProcessExecutor: Send + Sync + std::fmt::Debug {
32 fn execute(
49 &self,
50 command: &str,
51 args: &[&str],
52 env: &[(String, String)],
53 workdir: Option<&Path>,
54 ) -> io::Result<ProcessOutput>;
55
56 fn spawn(
77 &self,
78 command: &str,
79 args: &[&str],
80 env: &[(String, String)],
81 workdir: Option<&Path>,
82 ) -> io::Result<SpawnedProcess> {
83 let mut child = build_command(command, args, env, workdir)
84 .stdin(std::process::Stdio::piped())
85 .stdout(std::process::Stdio::piped())
86 .stderr(std::process::Stdio::piped())
87 .spawn()?;
88 let stdin = child.stdin.take();
89 Ok(SpawnedProcess {
90 stdin,
91 inner: child,
92 })
93 }
94
95 fn spawn_agent(&self, config: &AgentSpawnConfig) -> io::Result<AgentChildHandle> {
119 let child = build_agent_command_internal(
120 &config.command,
121 &config.args,
122 &config.env,
123 &config.prompt,
124 )
125 .stdin(std::process::Stdio::null())
126 .stdout(std::process::Stdio::piped())
127 .stderr(std::process::Stdio::piped())
128 .spawn()?;
129 wrap_agent_child(child)
130 }
131
132 fn command_exists(&self, command: &str) -> bool {
145 match self.execute(command, &[], &[], None) {
146 Ok(output) => output.status.success(),
147 Err(_) => false,
148 }
149 }
150
151 fn get_child_process_info(&self, parent_pid: u32) -> ChildProcessInfo {
165 #[cfg(unix)]
166 return get_child_process_info_unix(self, parent_pid);
167 #[cfg(not(unix))]
168 {
169 let _ = parent_pid;
170 ChildProcessInfo::NONE
171 }
172 }
173}
174
175const PS_ATTEMPTS: [&[&str]; 6] = [
176 &[
177 "-ax", "-o", "pid=", "-o", "ppid=", "-o", "pgid=", "-o", "stat=", "-o", "cputime=", "-o",
178 "comm=",
179 ],
180 &[
181 "-e", "-o", "pid=", "-o", "ppid=", "-o", "pgid=", "-o", "stat=", "-o", "cputime=", "-o",
182 "comm=",
183 ],
184 &[
185 "-ax", "-o", "pid=", "-o", "ppid=", "-o", "pgid=", "-o", "stat=", "-o", "cputime=",
186 ],
187 &[
188 "-e", "-o", "pid=", "-o", "ppid=", "-o", "pgid=", "-o", "stat=", "-o", "cputime=",
189 ],
190 &["-ax", "-o", "pid=", "-o", "ppid=", "-o", "cputime="],
191 &["-e", "-o", "pid=", "-o", "ppid=", "-o", "cputime="],
192];
193
194fn try_ps_args<E: ProcessExecutor + ?Sized>(
195 executor: &E,
196 args: &[&str],
197 parent_pid: u32,
198) -> Option<ChildProcessInfo> {
199 let out = executor.execute("ps", args, &[], None).ok()?;
200 out.status
201 .success()
202 .then(|| parse_ps_output(&out.stdout, parent_pid))
203 .flatten()
204}
205
206fn try_ps_output_chain<E: ProcessExecutor + ?Sized>(
207 executor: &E,
208 parent_pid: u32,
209) -> Option<ChildProcessInfo> {
210 PS_ATTEMPTS
211 .iter()
212 .find_map(|&args| try_ps_args(executor, args, parent_pid))
213}
214
215#[cfg(unix)]
216fn try_libproc_fallback(parent_pid: u32) -> Option<ChildProcessInfo> {
217 #[cfg(target_os = "macos")]
218 return child_info_from_libproc(parent_pid);
219 #[cfg(not(target_os = "macos"))]
220 {
221 let _ = parent_pid;
222 None
223 }
224}
225
226#[cfg(unix)]
227fn get_child_process_info_unix<E: ProcessExecutor + ?Sized>(
228 executor: &E,
229 parent_pid: u32,
230) -> ChildProcessInfo {
231 try_ps_output_chain(executor, parent_pid)
232 .or_else(|| try_libproc_fallback(parent_pid))
233 .or_else(|| try_pgrep_fallback(executor, parent_pid))
234 .unwrap_or_else(|| {
235 warn_child_process_detection_degraded();
236 ChildProcessInfo::NONE
237 })
238}
239
240fn try_pgrep_fallback<E: ProcessExecutor + ?Sized>(
241 executor: &E,
242 parent_pid: u32,
243) -> Option<ChildProcessInfo> {
244 let descendants = collect_descendants(executor, parent_pid);
245 if !descendants.is_empty() {
246 warn_child_process_detection_conservative();
247 return Some(child_info_from_descendant_pids(&descendants));
248 }
249 None
250}
251
252impl SpawnedProcess {
253 pub fn wait(&mut self) -> io::Result<()> {
259 self.inner.wait()?;
260 Ok(())
261 }
262
263 pub fn try_wait(&mut self) -> io::Result<Option<std::process::ExitStatus>> {
269 self.inner.try_wait()
270 }
271
272 pub fn kill(&mut self) -> io::Result<()> {
278 self.inner.kill()
279 }
280}
281
282fn wrap_agent_child(mut child: std::process::Child) -> io::Result<AgentChildHandle> {
283 let stdout = child
284 .stdout
285 .take()
286 .ok_or_else(|| io::Error::other("Failed to capture stdout"))?;
287 let stderr = child
288 .stderr
289 .take()
290 .ok_or_else(|| io::Error::other("Failed to capture stderr"))?;
291 Ok(AgentChildHandle {
292 stdout: Box::new(stdout),
293 stderr: Box::new(stderr),
294 inner: Box::new(RealAgentChild(child)),
295 })
296}
297
298#[cfg(test)]
299mod tests {
300 use super::*;
301 use std::collections::HashMap;
302
303 #[cfg(unix)]
304 fn ok_output(stdout: &str) -> ProcessOutput {
305 use std::os::unix::process::ExitStatusExt;
306
307 ProcessOutput {
308 status: std::process::ExitStatus::from_raw(0),
309 stdout: stdout.to_string(),
310 stderr: String::new(),
311 }
312 }
313
314 #[cfg(unix)]
315 type ResultMap = HashMap<(String, Vec<String>), ProcessOutput>;
316
317 #[cfg(unix)]
318 #[derive(Debug)]
319 struct TestExecutor {
320 results: ResultMap,
321 }
322
323 #[cfg(unix)]
324 impl TestExecutor {
325 fn new(results: ResultMap) -> Self {
326 Self { results }
327 }
328 }
329
330 #[cfg(unix)]
331 impl ProcessExecutor for TestExecutor {
332 fn execute(
333 &self,
334 command: &str,
335 args: &[&str],
336 _env: &[(String, String)],
337 _workdir: Option<&std::path::Path>,
338 ) -> std::io::Result<ProcessOutput> {
339 self.results
340 .get(&(
341 command.to_string(),
342 args.iter().map(ToString::to_string).collect(),
343 ))
344 .cloned()
345 .ok_or_else(|| std::io::Error::other("unexpected execute"))
346 }
347 }
348
349 #[cfg(unix)]
350 fn ps_key() -> (String, Vec<String>) {
351 (
352 "ps".to_string(),
353 vec![
354 "-ax".to_string(),
355 "-o".to_string(),
356 "pid=".to_string(),
357 "-o".to_string(),
358 "ppid=".to_string(),
359 "-o".to_string(),
360 "cputime=".to_string(),
361 ],
362 )
363 }
364
365 #[cfg(unix)]
366 fn ps_key_with_state_and_group() -> (String, Vec<String>) {
367 (
368 "ps".to_string(),
369 vec![
370 "-ax".to_string(),
371 "-o".to_string(),
372 "pid=".to_string(),
373 "-o".to_string(),
374 "ppid=".to_string(),
375 "-o".to_string(),
376 "pgid=".to_string(),
377 "-o".to_string(),
378 "stat=".to_string(),
379 "-o".to_string(),
380 "cputime=".to_string(),
381 ],
382 )
383 }
384
385 #[cfg(unix)]
386 fn ps_key_with_state_group_and_command() -> (String, Vec<String>) {
387 (
388 "ps".to_string(),
389 vec![
390 "-ax".to_string(),
391 "-o".to_string(),
392 "pid=".to_string(),
393 "-o".to_string(),
394 "ppid=".to_string(),
395 "-o".to_string(),
396 "pgid=".to_string(),
397 "-o".to_string(),
398 "stat=".to_string(),
399 "-o".to_string(),
400 "cputime=".to_string(),
401 "-o".to_string(),
402 "comm=".to_string(),
403 ],
404 )
405 }
406
407 #[cfg(unix)]
408 fn pgrep_key(parent_pid: u32) -> (String, Vec<String>) {
409 (
410 "pgrep".to_string(),
411 vec!["-P".to_string(), parent_pid.to_string()],
412 )
413 }
414
415 #[test]
416 #[cfg(unix)]
417 fn get_child_process_info_legacy_ps_output_is_conservative_about_current_activity() {
418 let pid = 4242;
419
420 let mut results: ResultMap = HashMap::new();
421 results.insert(
422 ps_key(),
423 ok_output("12345 4242 0:01.50\n12346 4242 0:03.00\n99999 1 0:10.00\n"),
424 );
425
426 let exec = TestExecutor::new(results);
427 let info = exec.get_child_process_info(pid);
428 assert_eq!(info.child_count, 2, "should find 2 children of pid 4242");
429 assert_eq!(
430 info.active_child_count, 0,
431 "legacy ps output without state or process-group columns must not report current activity"
432 );
433 assert_eq!(
434 info.cpu_time_ms,
435 1500 + 3000,
436 "should sum CPU times of both children"
437 );
438 assert!(info.has_children());
439 }
440
441 #[test]
442 #[cfg(unix)]
443 fn get_child_process_info_no_children_returns_zero() {
444 let pid = 4242;
445
446 let mut results: ResultMap = HashMap::new();
447 results.insert(ps_key(), ok_output("99999 1 0:10.00\n"));
448
449 let exec = TestExecutor::new(results);
450 let info = exec.get_child_process_info(pid);
451 assert_eq!(info.child_count, 0);
452 assert_eq!(info.active_child_count, 0);
453 assert_eq!(info.cpu_time_ms, 0);
454 assert!(!info.has_children());
455 }
456
457 #[test]
458 #[cfg(unix)]
459 fn parse_cputime_formats() {
460 let pid = 100;
461
462 let mut results: ResultMap = HashMap::new();
463 results.insert(ps_key(), ok_output("200 100 01:02:03\n"));
464
465 let exec = TestExecutor::new(results);
466 let info = exec.get_child_process_info(pid);
467 assert_eq!(
468 info.cpu_time_ms,
469 (3600 + 2 * 60 + 3) * 1000,
470 "HH:MM:SS should parse to correct ms"
471 );
472 }
473
474 #[test]
475 #[cfg(unix)]
476 fn parse_cputime_with_day_prefix() {
477 let pid = 100;
478
479 let mut results: ResultMap = HashMap::new();
480 results.insert(ps_key(), ok_output("200 100 1-02:03:04\n"));
481
482 let exec = TestExecutor::new(results);
483 let info = exec.get_child_process_info(pid);
484 assert_eq!(
485 info.cpu_time_ms,
486 ((24 + 2) * 3600 + 3 * 60 + 4) * 1000,
487 "DD-HH:MM:SS should parse to correct ms"
488 );
489 }
490
491 #[test]
492 #[cfg(unix)]
493 fn get_child_process_info_includes_grandchildren() {
494 let parent = 100;
495 let ps_output = "200 100 0:01.00\n300 200 0:02.00\n999 1 0:05.00\n";
496
497 let mut results: ResultMap = HashMap::new();
498 results.insert(ps_key(), ok_output(ps_output));
499
500 let exec = TestExecutor::new(results);
501 let info = exec.get_child_process_info(parent);
502 assert_eq!(
503 info.child_count, 2,
504 "should count both child and grandchild"
505 );
506 assert_eq!(
507 info.cpu_time_ms,
508 1000 + 2000,
509 "should sum CPU of child and grandchild"
510 );
511 }
512
513 #[test]
514 #[cfg(unix)]
515 fn get_child_process_info_excludes_unrelated_processes() {
516 let parent = 100;
517 let ps_output = "200 100 0:01.00\n300 400 0:02.00\n400 1 0:03.00\n";
518
519 let mut results: ResultMap = HashMap::new();
520 results.insert(ps_key(), ok_output(ps_output));
521
522 let exec = TestExecutor::new(results);
523 let info = exec.get_child_process_info(parent);
524 assert_eq!(info.child_count, 1, "should only count PID 200");
525 assert_eq!(
526 info.active_child_count, 0,
527 "legacy ps output without state columns must remain conservative even for related descendants"
528 );
529 assert_eq!(info.cpu_time_ms, 1000, "should only sum CPU of PID 200");
530 }
531
532 #[test]
533 #[cfg(unix)]
534 fn get_child_process_info_deep_tree() {
535 let parent = 100;
536 let ps_output = "200 100 0:01.00\n300 200 0:02.00\n400 300 0:03.00\n";
537
538 let mut results: ResultMap = HashMap::new();
539 results.insert(ps_key(), ok_output(ps_output));
540
541 let exec = TestExecutor::new(results);
542 let info = exec.get_child_process_info(parent);
543 assert_eq!(
544 info.child_count, 3,
545 "should count all 3 levels of descendants"
546 );
547 assert_eq!(
548 info.cpu_time_ms,
549 1000 + 2000 + 3000,
550 "should sum CPU across all descendants"
551 );
552 }
553
554 #[test]
555 #[cfg(unix)]
556 fn get_child_process_info_pgrep_fallback_does_not_report_active_children() {
557 let parent = 100;
558
559 let mut results: ResultMap = HashMap::new();
560 results.insert(pgrep_key(100), ok_output("200\n300\n"));
561 results.insert(pgrep_key(200), ok_output("400\n"));
562 results.insert(pgrep_key(300), ok_output(""));
563 results.insert(pgrep_key(400), ok_output(""));
564
565 let exec = TestExecutor::new(results);
566 let info = exec.get_child_process_info(parent);
567
568 assert_eq!(info.child_count, 3);
569 assert_eq!(
570 info.active_child_count, 0,
571 "fallback without process state or cpu evidence must not report active children"
572 );
573 assert_eq!(info.cpu_time_ms, 0);
574 assert_ne!(
575 info.descendant_pid_signature, 0,
576 "observable descendants should retain a stable signature even in fallback mode"
577 );
578 }
579
580 #[test]
581 #[cfg(unix)]
582 fn get_child_process_info_excludes_descendants_in_other_process_groups() {
583 let parent = 100;
584
585 let mut results: ResultMap = HashMap::new();
586 results.insert(
587 ps_key_with_state_and_group(),
588 ok_output(
589 "200 100 100 S 0:01.00\n201 100 201 S 0:05.00\n300 200 100 S 0:02.00\n301 201 201 S 0:09.00\n",
590 ),
591 );
592
593 let exec = TestExecutor::new(results);
594 let info = exec.get_child_process_info(parent);
595
596 assert_eq!(
597 info.child_count, 2,
598 "only descendants that remain in the agent process group should qualify"
599 );
600 assert_eq!(
601 info.active_child_count, 0,
602 "sleeping same-process-group descendants should remain observable without suppressing timeout"
603 );
604 assert_eq!(
605 info.cpu_time_ms,
606 1000 + 2000,
607 "detached descendants in a different process group must be excluded"
608 );
609 }
610
611 #[test]
612 #[cfg(unix)]
613 fn get_child_process_info_counts_busy_shell_without_descendants_as_current_work() {
614 let parent = 100;
615
616 let mut results: ResultMap = HashMap::new();
617 results.insert(
618 ps_key_with_state_group_and_command(),
619 ok_output("200 100 100 R 0:01.00 sh\n"),
620 );
621
622 let exec = TestExecutor::new(results);
623 let info = exec.get_child_process_info(parent);
624
625 assert_eq!(info.child_count, 1);
626 assert_eq!(
627 info.active_child_count, 1,
628 "a shell process that is itself running with accumulated CPU must count as current child work even without descendants"
629 );
630 assert_eq!(info.cpu_time_ms, 1000);
631 }
632
633 #[test]
634 #[cfg(unix)]
635 fn get_child_process_info_keeps_non_wrapper_busy_processes_active() {
636 let parent = 100;
637
638 let mut results: ResultMap = HashMap::new();
639 results.insert(
640 ps_key_with_state_group_and_command(),
641 ok_output("200 100 100 R 0:01.00 python3\n"),
642 );
643
644 let exec = TestExecutor::new(results);
645 let info = exec.get_child_process_info(parent);
646
647 assert_eq!(info.child_count, 1);
648 assert_eq!(
649 info.active_child_count, 1,
650 "real worker processes must still count as current child work when they are busy"
651 );
652 assert_eq!(info.cpu_time_ms, 1000);
653 }
654
655 #[test]
656 #[cfg(unix)]
657 fn get_child_process_info_excludes_zombie_descendants() {
658 let parent = 100;
659
660 let mut results: ResultMap = HashMap::new();
661 results.insert(
662 ps_key_with_state_and_group(),
663 ok_output("200 100 100 S 0:01.00\n201 100 100 Z 0:05.00\n"),
664 );
665
666 let exec = TestExecutor::new(results);
667 let info = exec.get_child_process_info(parent);
668
669 assert_eq!(info.child_count, 1, "zombie descendants must not qualify");
670 assert_eq!(info.active_child_count, 0);
671 assert_eq!(info.cpu_time_ms, 1000, "zombie cpu time must be ignored");
672 }
673
674 #[test]
675 #[cfg(unix)]
676 fn get_child_process_info_returns_none_when_only_non_qualifying_descendants_exist() {
677 let parent = 100;
678
679 let mut results: ResultMap = HashMap::new();
680 results.insert(
681 ps_key_with_state_and_group(),
682 ok_output("200 100 200 S 0:01.00\n300 200 200 S 0:02.00\n"),
683 );
684
685 let exec = TestExecutor::new(results);
686 let info = exec.get_child_process_info(parent);
687
688 assert_eq!(
689 info,
690 ChildProcessInfo::NONE,
691 "an empty qualified descendant set must normalize to no active child work"
692 );
693 }
694
695 #[test]
696 #[cfg(unix)]
697 fn get_child_process_info_excludes_zero_cpu_descendants_without_activity_evidence() {
698 let parent = 100;
699
700 let mut results: ResultMap = HashMap::new();
701 results.insert(
702 ps_key_with_state_and_group(),
703 ok_output("200 100 100 S 0:00.00\n"),
704 );
705
706 let exec = TestExecutor::new(results);
707 let info = exec.get_child_process_info(parent);
708
709 assert_eq!(info.child_count, 1);
710 assert_eq!(info.active_child_count, 0);
711 assert_eq!(info.cpu_time_ms, 0);
712 }
713
714 #[test]
715 #[cfg(unix)]
716 fn get_child_process_info_does_not_count_running_zero_cpu_descendants_as_currently_active() {
717 let parent = 100;
718
719 let mut results: ResultMap = HashMap::new();
720 results.insert(
721 ps_key_with_state_and_group(),
722 ok_output("200 100 100 R 0:00.00\n"),
723 );
724
725 let exec = TestExecutor::new(results);
726 let info = exec.get_child_process_info(parent);
727
728 assert_eq!(info.child_count, 1);
729 assert_eq!(
730 info.active_child_count, 0,
731 "running descendants with zero accumulated CPU should not yet count as current work"
732 );
733 assert_eq!(info.cpu_time_ms, 0);
734 }
735
736 #[test]
737 #[cfg(unix)]
738 fn get_child_process_info_excludes_sleeping_descendants_with_only_historical_cpu() {
739 let parent = 100;
740
741 let mut results: ResultMap = HashMap::new();
742 results.insert(
743 ps_key_with_state_and_group(),
744 ok_output("200 100 100 S 0:01.00\n300 200 100 S 0:02.00\n"),
745 );
746
747 let exec = TestExecutor::new(results);
748 let info = exec.get_child_process_info(parent);
749
750 assert_eq!(info.child_count, 2);
751 assert_eq!(info.active_child_count, 0);
752 assert_eq!(info.cpu_time_ms, 3000);
753 }
754
755 #[test]
756 #[cfg(unix)]
757 fn get_child_process_info_pgrep_fallback_is_conservative() {
758 let parent = 100;
759
760 let mut results: ResultMap = HashMap::new();
761 results.insert(pgrep_key(100), ok_output("200\n300\n"));
762 results.insert(pgrep_key(200), ok_output(""));
763 results.insert(pgrep_key(300), ok_output(""));
764
765 let exec = TestExecutor::new(results);
766 let info = exec.get_child_process_info(parent);
767
768 assert!(info.has_children());
769 assert!(
770 !info.has_currently_active_children(),
771 "fallback without process-state or cpu evidence must not suppress idle timeout"
772 );
773 assert_eq!(info.cpu_time_ms, 0);
774 }
775
776 #[test]
777 #[cfg(target_os = "macos")]
778 fn child_pid_entry_count_converts_libproc_bytes_to_pid_count() {
779 use super::super::macos::child_pid_entry_count;
780
781 let pid_width = i32::try_from(std::mem::size_of::<libc::pid_t>())
782 .expect("pid_t size should fit in i32");
783
784 assert_eq!(child_pid_entry_count(pid_width * 3), Some(3));
785 assert_eq!(child_pid_entry_count(pid_width), Some(1));
786 assert_eq!(child_pid_entry_count(0), Some(0));
787 }
788}