use crate::core::NormalizedPath;
use std::env;
use std::fs;
use std::io::Write;
use std::path::Path;
use std::process::{Child, Command};
use std::time::{Duration, Instant};
use wait_timeout::ChildExt;
pub const DEFAULT_TIMEOUT_SECS: u64 = 300;
pub fn resolve_timeout() -> Duration {
let secs = env::var("ZCCACHE_CI_TIMEOUT_SECS")
.ok()
.and_then(|v| v.parse::<u64>().ok())
.filter(|s| *s > 0)
.unwrap_or(DEFAULT_TIMEOUT_SECS);
Duration::from_secs(secs)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum StageOutcome {
Exited(i32),
GlobalTimeout,
SpawnFailed,
}
pub trait ProgressSink: Send {
fn write_line(&mut self, line: &str);
}
pub struct StderrProgress;
impl ProgressSink for StderrProgress {
fn write_line(&mut self, line: &str) {
let _ = writeln!(std::io::stderr(), "{line}");
}
}
#[derive(Default)]
pub struct CapturingProgress {
pub lines: Vec<String>,
}
impl ProgressSink for CapturingProgress {
fn write_line(&mut self, line: &str) {
self.lines.push(line.to_string());
}
}
pub struct StageRunner<P: ProgressSink = StderrProgress> {
started: Instant,
deadline: Instant,
progress: P,
last_stage: Option<String>,
}
impl StageRunner<StderrProgress> {
pub fn new(timeout: Duration) -> Self {
Self::with_progress(timeout, StderrProgress)
}
}
impl<P: ProgressSink> StageRunner<P> {
pub fn with_progress(timeout: Duration, progress: P) -> Self {
let started = Instant::now();
Self {
started,
deadline: started + timeout,
progress,
last_stage: None,
}
}
pub fn elapsed(&self) -> Duration {
self.started.elapsed()
}
pub fn remaining(&self) -> Duration {
self.deadline.saturating_duration_since(Instant::now())
}
pub fn last_stage(&self) -> Option<&str> {
self.last_stage.as_deref()
}
pub fn progress_ref(&self) -> &P {
&self.progress
}
pub fn start_stage(&mut self, stage: &str) {
let elapsed = self.elapsed();
self.progress
.write_line(&format!("[{}] -> {}", format_elapsed(elapsed), stage));
self.last_stage = Some(stage.to_string());
}
pub fn finish(&mut self) {
let elapsed = self.elapsed();
self.progress
.write_line(&format!("[{}] done", format_elapsed(elapsed)));
}
pub fn run(&mut self, stage: &str, cmd: &mut Command) -> StageOutcome {
self.start_stage(stage);
if self.remaining().is_zero() {
self.report_timeout(stage, None);
return StageOutcome::GlobalTimeout;
}
configure_process_group(cmd);
let mut child = match cmd.spawn() {
Ok(c) => c,
Err(e) => {
let _ = writeln!(std::io::stderr(), "{stage}: failed to spawn child: {e}");
return StageOutcome::SpawnFailed;
}
};
match child.wait_timeout(self.remaining()) {
Ok(Some(status)) => StageOutcome::Exited(status.code().unwrap_or(-1)),
Ok(None) => {
self.report_timeout(stage, Some(&child));
kill_process_tree(&mut child);
StageOutcome::GlobalTimeout
}
Err(e) => {
let _ = writeln!(std::io::stderr(), "{stage}: wait error: {e}");
let _ = child.kill();
let _ = child.wait();
StageOutcome::SpawnFailed
}
}
}
pub fn report_timeout(&mut self, stage: &str, child: Option<&Child>) {
let elapsed = self.elapsed();
let _ = writeln!(
std::io::stderr(),
"STOP-HOOK TIMEOUT after {} - capturing state",
format_elapsed(elapsed)
);
let _ = writeln!(std::io::stderr(), " hung stage: {stage}");
if let Some(prev) = &self.last_stage {
if prev != stage {
let _ = writeln!(std::io::stderr(), " last stage: {prev}");
}
}
if let Some(c) = child {
let _ = writeln!(std::io::stderr(), " child PID: {}", c.id());
}
capture_diagnostics();
}
}
pub fn format_elapsed(d: Duration) -> String {
let total_ms = d.as_millis();
let secs = total_ms / 1000;
let tenths = (total_ms % 1000) / 100;
format!("{secs}.{tenths}s")
}
pub fn configure_process_group(cmd: &mut Command) {
#[cfg(unix)]
{
use std::os::unix::process::CommandExt;
unsafe {
cmd.pre_exec(|| {
if libc::setsid() == -1 {
return Err(std::io::Error::last_os_error());
}
Ok(())
});
}
}
#[cfg(windows)]
{
use std::os::windows::process::CommandExt;
const CREATE_NEW_PROCESS_GROUP: u32 = 0x0000_0200;
cmd.creation_flags(CREATE_NEW_PROCESS_GROUP);
}
#[cfg(not(any(unix, windows)))]
{
let _ = cmd;
}
}
pub fn kill_process_tree(child: &mut Child) {
let pid = child.id();
#[cfg(windows)]
{
use std::process::Stdio;
let _ = Command::new("taskkill")
.args(["/T", "/F", "/PID", &pid.to_string()])
.stdout(Stdio::null())
.stderr(Stdio::null())
.status();
}
#[cfg(unix)]
{
let pid_i = pid as i32;
unsafe {
libc::kill(-pid_i, libc::SIGKILL);
}
}
let _ = child.kill();
let _ = child.wait();
}
pub fn capture_diagnostics() {
eprintln!("--- diagnostics ---");
dump_relevant_processes();
dump_daemon_lock();
dump_zccache_logs();
dump_compile_journal();
eprintln!("--- end diagnostics ---");
}
fn dump_relevant_processes() {
let mut sys = sysinfo::System::new();
sys.refresh_processes(sysinfo::ProcessesToUpdate::All, true);
eprintln!("processes (zccache/cargo/rustc/soldr):");
let mut count = 0usize;
for (pid, p) in sys.processes() {
let name = p.name().to_string_lossy();
let lower = name.to_ascii_lowercase();
if lower.contains("zccache")
|| lower.contains("cargo")
|| lower.contains("rustc")
|| lower.contains("soldr")
{
eprintln!(
" PID={pid} name={} status={:?} cpu={:.1}% mem={}KB",
name,
p.status(),
p.cpu_usage(),
p.memory() / 1024,
);
count += 1;
}
}
if count == 0 {
eprintln!(" (none found)");
}
}
fn home_dir() -> Option<NormalizedPath> {
let key = if cfg!(windows) { "USERPROFILE" } else { "HOME" };
env::var_os(key).map(|os| NormalizedPath::new(Path::new(&os)))
}
fn dump_daemon_lock() {
let Some(home) = home_dir() else { return };
let lock = home.join(".zccache").join("daemon.lock");
eprintln!("daemon.lock ({}):", lock.display());
match fs::read_to_string(&lock) {
Ok(s) => {
for line in s.lines() {
eprintln!(" {line}");
}
}
Err(e) => eprintln!(" (unreadable: {e})"),
}
}
fn dump_tail(path: &Path, lines: usize) {
match fs::read_to_string(path) {
Ok(s) => {
let collected: Vec<&str> = s.lines().collect();
let start = collected.len().saturating_sub(lines);
for line in &collected[start..] {
eprintln!(" {line}");
}
}
Err(e) => eprintln!(" (unreadable {}: {})", path.display(), e),
}
}
fn dump_zccache_logs() {
let Some(home) = home_dir() else { return };
let log_dir = home.join(".zccache").join("logs");
eprintln!("zccache logs ({}):", log_dir.display());
let entries = match fs::read_dir(&log_dir) {
Ok(it) => it,
Err(e) => {
eprintln!(" (no log dir: {e})");
return;
}
};
let mut found = false;
for entry in entries.flatten() {
let p = entry.path();
if p.extension().and_then(|s| s.to_str()) == Some("log") {
found = true;
eprintln!(" --- tail of {} ---", p.display());
dump_tail(&p, 50);
}
}
if !found {
eprintln!(" (no .log files)");
}
}
fn dump_compile_journal() {
let Some(home) = home_dir() else { return };
let journal = home
.join(".soldr")
.join("cache")
.join("zccache")
.join("logs")
.join("compile_journal.jsonl");
eprintln!("soldr compile_journal ({}):", journal.display());
if !journal.exists() {
eprintln!(" (not present)");
return;
}
dump_tail(&journal, 50);
}
pub const KILL_DAEMON_WAIT: Duration = Duration::from_secs(2);
fn find_daemon_pids() -> Vec<u32> {
use sysinfo::ProcessesToUpdate;
let mut sys = sysinfo::System::new();
sys.refresh_processes(ProcessesToUpdate::All, true);
sys.processes()
.iter()
.filter_map(|(pid, process)| {
let name = process.name().to_string_lossy();
(name == "zccache-daemon" || name == "zccache-daemon.exe").then_some(pid.as_u32())
})
.collect()
}
pub fn kill_pids_and_wait(pids: &[u32], timeout: Duration) {
if pids.is_empty() {
return;
}
for pid in pids {
if let Err(e) = crate::ipc::force_kill_process(*pid) {
eprintln!("force_kill_process({pid}) failed: {e}");
}
}
let deadline = Instant::now() + timeout;
let poll = Duration::from_millis(25);
loop {
let any_alive = pids.iter().any(|pid| crate::ipc::is_process_alive(*pid));
if !any_alive {
return;
}
if Instant::now() >= deadline {
let still_alive: Vec<u32> = pids
.iter()
.copied()
.filter(|pid| crate::ipc::is_process_alive(*pid))
.collect();
eprintln!(
"Warning: daemon PIDs still alive after {}ms: {:?}",
timeout.as_millis(),
still_alive
);
return;
}
std::thread::sleep(poll);
}
}
pub fn kill_daemon() {
let pids = find_daemon_pids();
if pids.is_empty() {
crate::ipc::remove_lock_file();
return;
}
for pid in &pids {
eprintln!("Killing running daemon (PID {pid}) to unlock target binaries");
}
kill_pids_and_wait(&pids, KILL_DAEMON_WAIT);
crate::ipc::remove_lock_file();
}
pub fn reap_orphan_daemons() -> Vec<u32> {
use sysinfo::{Pid, ProcessesToUpdate};
let mut sys = sysinfo::System::new();
sys.refresh_processes(ProcessesToUpdate::All, true);
let alive: std::collections::HashSet<Pid> = sys.processes().keys().copied().collect();
let mut killed = Vec::new();
for (pid, process) in sys.processes() {
let name = process.name().to_string_lossy();
if name != "zccache-daemon" && name != "zccache-daemon.exe" {
continue;
}
let parent = process.parent();
let is_orphan = match parent {
None => true,
Some(ppid) => !alive.contains(&ppid),
};
if is_orphan {
eprintln!("Reaping orphan zccache-daemon PID={pid} (parent {parent:?} gone)");
if process.kill() {
killed.push(pid.as_u32());
}
}
}
killed
}
#[cfg(test)]
mod tests {
use super::*;
use std::process::Stdio;
fn sleep_forever_cmd() -> Command {
if cfg!(windows) {
let mut c = Command::new("cmd");
c.args(["/C", "ping -n 600 127.0.0.1 > NUL"]);
c.stdout(Stdio::null()).stderr(Stdio::null());
c
} else {
let mut c = Command::new("sh");
c.args(["-c", "sleep 600"]);
c.stdout(Stdio::null()).stderr(Stdio::null());
c
}
}
fn quick_exit_cmd() -> Command {
if cfg!(windows) {
let mut c = Command::new("cmd");
c.args(["/C", "exit 0"]);
c.stdout(Stdio::null()).stderr(Stdio::null());
c
} else {
let mut c = Command::new("true");
c.stdout(Stdio::null()).stderr(Stdio::null());
c
}
}
#[test]
fn format_elapsed_examples() {
assert_eq!(format_elapsed(Duration::from_millis(0)), "0.0s");
assert_eq!(format_elapsed(Duration::from_millis(300)), "0.3s");
assert_eq!(format_elapsed(Duration::from_millis(12_400)), "12.4s");
assert_eq!(format_elapsed(Duration::from_secs(34)), "34.0s");
}
#[test]
fn resolve_timeout_uses_default_when_unset() {
let parsed = "0".parse::<u64>().ok().filter(|s| *s > 0);
assert!(parsed.is_none(), "0 should be filtered out as invalid");
let parsed = "abc".parse::<u64>().ok();
assert!(parsed.is_none());
}
#[test]
fn run_returns_exit_code_when_child_exits_normally() {
let mut runner = StageRunner::new(Duration::from_secs(5));
let outcome = runner.run("quick", &mut quick_exit_cmd());
assert_eq!(outcome, StageOutcome::Exited(0));
}
#[test]
fn run_times_out_and_kills_child_when_deadline_elapses() {
let mut runner = StageRunner::new(Duration::from_millis(200));
let outcome = runner.run("hang", &mut sleep_forever_cmd());
assert_eq!(outcome, StageOutcome::GlobalTimeout);
assert_eq!(runner.last_stage(), Some("hang"));
}
#[test]
fn run_skips_when_already_over_budget() {
let mut runner = StageRunner::new(Duration::from_millis(0));
let outcome = runner.run("noop", &mut quick_exit_cmd());
assert_eq!(outcome, StageOutcome::GlobalTimeout);
}
#[test]
fn progress_markers_capture_each_stage() {
let mut runner =
StageRunner::with_progress(Duration::from_secs(5), CapturingProgress::default());
runner.start_stage("fmt-check");
runner.start_stage("clippy");
runner.start_stage("test");
runner.finish();
let progress = runner.progress_ref();
assert_eq!(progress.lines.len(), 4);
assert!(progress.lines[0].ends_with("-> fmt-check"));
assert!(progress.lines[1].ends_with("-> clippy"));
assert!(progress.lines[2].ends_with("-> test"));
assert!(progress.lines[3].ends_with("done"));
for line in &progress.lines {
assert!(
line.starts_with('[') && line.contains("s]"),
"expected elapsed prefix in {line:?}"
);
}
}
#[test]
fn reap_orphan_daemons_returns_a_vec_without_panic() {
let killed = reap_orphan_daemons();
let _ = killed.len();
}
#[test]
fn run_emits_progress_marker_for_hung_stage() {
let mut runner =
StageRunner::with_progress(Duration::from_millis(200), CapturingProgress::default());
let outcome = runner.run("hang", &mut sleep_forever_cmd());
assert_eq!(outcome, StageOutcome::GlobalTimeout);
let progress = runner.progress_ref();
assert!(
progress.lines.iter().any(|l| l.contains("-> hang")),
"expected progress marker, got {:?}",
progress.lines
);
}
#[test]
fn kill_pids_and_wait_returns_only_after_child_is_dead() {
let child = sleep_forever_cmd().spawn().expect("failed to spawn child");
let pid = child.id();
assert!(
crate::ipc::is_process_alive(pid),
"spawned child PID {pid} should be alive"
);
let waiter = std::thread::spawn(move || {
let mut child = child;
let _ = child.wait();
});
kill_pids_and_wait(&[pid], Duration::from_secs(5));
waiter.join().expect("reaper thread panicked");
assert!(
!crate::ipc::is_process_alive(pid),
"PID {pid} still alive after kill_pids_and_wait returned"
);
}
#[test]
fn kill_pids_and_wait_is_a_noop_for_empty_input() {
let start = Instant::now();
kill_pids_and_wait(&[], Duration::from_secs(60));
assert!(
start.elapsed() < Duration::from_millis(500),
"empty-input kill should return promptly, took {:?}",
start.elapsed()
);
}
}