Skip to main content

git_worktree_manager/operations/
busy.rs

1//! Busy detection: determine whether a worktree is currently in use.
2//!
3//! Two signals are combined:
4//!   1. Session lockfile (explicit — `gw shell`/`gw start` write one)
5//!   2. Process cwd scan (implicit — catches external `cd` + tool usage)
6//!
7//! The current process and its ancestor chain are excluded so that Claude
8//! Code or a parent shell invoking `gw delete` on its own worktree does
9//! not self-detect as busy.
10
11use std::collections::HashSet;
12use std::path::{Path, PathBuf};
13#[cfg(target_os = "macos")]
14use std::process::Command;
15use std::sync::OnceLock;
16
17use super::lockfile;
18
19/// Signal source that flagged a process as busy.
20#[derive(Debug, Clone, PartialEq, Eq)]
21pub enum BusySource {
22    Lockfile,
23    ProcessScan,
24}
25
26/// Information about a single process holding a worktree busy.
27#[derive(Debug, Clone)]
28pub struct BusyInfo {
29    pub pid: u32,
30    pub cmd: String,
31    /// For lockfile sources, this is the worktree path (the process's
32    /// actual cwd is unknown). For process-scan sources, this is the
33    /// process's canonicalized cwd.
34    pub cwd: PathBuf,
35    pub source: BusySource,
36}
37
38/// Cached self-process-tree for the lifetime of this `gw` invocation.
39static SELF_TREE: OnceLock<HashSet<u32>> = OnceLock::new();
40
41/// Cached sibling set — processes sharing `gw`'s direct parent PID, captured
42/// once per invocation. This covers shell pipeline co-members (e.g. when a
43/// user runs `gw list | head` the `head` process is gw's sibling, not an
44/// ancestor) and a few other co-spawned helpers.
45static SELF_SIBLINGS: OnceLock<HashSet<u32>> = OnceLock::new();
46
47/// Cached raw cwd scan. On unix this is populated once per `gw` invocation
48/// (lsof / /proc walk is expensive). Each entry: (pid, cmd, canon_cwd).
49static CWD_SCAN_CACHE: OnceLock<Vec<(u32, String, PathBuf)>> = OnceLock::new();
50
51/// Emits the "could not scan processes" warning at most once per process.
52/// `gw` is short-lived so this is appropriate; a long-running daemon using
53/// this module would need to rework this (currently not a use case).
54static SCAN_WARNING: OnceLock<()> = OnceLock::new();
55
56fn compute_self_tree() -> HashSet<u32> {
57    let mut tree = HashSet::new();
58    tree.insert(std::process::id());
59
60    #[cfg(unix)]
61    {
62        let mut pid = unsafe { libc::getppid() } as u32;
63        for _ in 0..64 {
64            // PID 0 is a kernel/orphan marker, not a userland process — skip.
65            if pid == 0 {
66                break;
67            }
68            // PID 1 (init/launchd) IS our ancestor when gw was reparented, so
69            // exclude it from busy detection just like any other ancestor.
70            // Stop walking: init has no meaningful parent for our purposes.
71            if pid == 1 {
72                tree.insert(pid);
73                break;
74            }
75            tree.insert(pid);
76            match parent_of(pid) {
77                Some(ppid) if ppid != pid => pid = ppid,
78                _ => break,
79            }
80        }
81    }
82    tree
83}
84
85/// Returns the current process + all ancestor PIDs (via getppid chain).
86/// Memoized for the lifetime of the process — the ancestry does not change
87/// during a single `gw` invocation.
88pub fn self_process_tree() -> &'static HashSet<u32> {
89    SELF_TREE.get_or_init(compute_self_tree)
90}
91
92/// Compute the set of processes sharing `gw`'s process group ID.
93///
94/// Shells set up pipelines (`gw list | head | awk`) by putting all members
95/// in a single process group that becomes the foreground job. Using pgid
96/// as the sibling criterion matches exactly those pipeline co-members and
97/// excludes them from busy detection — they inherited the shell's cwd but
98/// are transient artifacts of the current command, not real occupants.
99///
100/// This is deliberately narrower than "processes sharing our ppid": the
101/// broader criterion would also exclude legitimate busy processes that
102/// happen to be spawned by the same parent as `gw` (e.g. a test harness
103/// running both a long-lived worker and `gw` from the same Cargo runner).
104#[cfg(unix)]
105fn compute_self_siblings() -> HashSet<u32> {
106    let mut siblings = HashSet::new();
107    let our_pid = std::process::id();
108    let our_pgid = unsafe { libc::getpgrp() } as u32;
109    if our_pgid == 0 || our_pgid == 1 {
110        return siblings;
111    }
112    // Distinguish two scenarios with the same raw pgid test:
113    //   (a) gw is a member of a shell pipeline (`gw list | head`). The shell
114    //       placed the pipeline in its own process group, so our pgid differs
115    //       from our parent's pgid. Pipeline co-members share our pgid and
116    //       are safe to exclude.
117    //   (b) gw was spawned by a non-shell parent that did not call setpgid
118    //       (e.g. `cargo test` spawning both gw and a long-lived worker).
119    //       Our pgid equals our parent's pgid, which means "same pgid" also
120    //       matches unrelated siblings that legitimately occupy a worktree.
121    //       In this case we return an empty set and let the ancestor-only
122    //       filter handle things.
123    let parent_pid = unsafe { libc::getppid() } as u32;
124    if parent_pid == 0 {
125        return siblings;
126    }
127    let parent_pgid = pgid_of(parent_pid).unwrap_or(0);
128    if parent_pgid == our_pgid {
129        return siblings;
130    }
131    for (pid, _, _) in cwd_scan() {
132        if *pid == our_pid {
133            continue;
134        }
135        if let Some(pgid) = pgid_of(*pid) {
136            if pgid == our_pgid {
137                siblings.insert(*pid);
138            }
139        }
140    }
141    siblings
142}
143
144#[cfg(not(unix))]
145fn compute_self_siblings() -> HashSet<u32> {
146    HashSet::new()
147}
148
149#[cfg(target_os = "linux")]
150fn pgid_of(pid: u32) -> Option<u32> {
151    let status = std::fs::read_to_string(format!("/proc/{}/stat", pid)).ok()?;
152    // /proc/<pid>/stat: "pid (comm) state ppid pgid ..."
153    // Parse from the last ')' to avoid confusion with spaces/parens in comm.
154    let after_comm = status.rsplit_once(')')?.1;
155    let fields: Vec<&str> = after_comm.split_whitespace().collect();
156    // After ')' the fields are: state ppid pgid ...
157    // So pgid is index 2.
158    fields.get(2)?.parse().ok()
159}
160
161#[cfg(target_os = "macos")]
162fn pgid_of(pid: u32) -> Option<u32> {
163    let out = Command::new("ps")
164        .args(["-o", "pgid=", "-p", &pid.to_string()])
165        .output()
166        .ok()?;
167    if !out.status.success() {
168        return None;
169    }
170    String::from_utf8_lossy(&out.stdout).trim().parse().ok()
171}
172
173#[cfg(not(any(target_os = "linux", target_os = "macos")))]
174#[allow(dead_code)]
175fn pgid_of(_pid: u32) -> Option<u32> {
176    None
177}
178
179/// Returns the memoized sibling set (see `compute_self_siblings`).
180pub fn self_siblings() -> &'static HashSet<u32> {
181    SELF_SIBLINGS.get_or_init(compute_self_siblings)
182}
183
184#[cfg(target_os = "linux")]
185fn parent_of(pid: u32) -> Option<u32> {
186    let status = std::fs::read_to_string(format!("/proc/{}/status", pid)).ok()?;
187    for line in status.lines() {
188        if let Some(rest) = line.strip_prefix("PPid:") {
189            return rest.trim().parse().ok();
190        }
191    }
192    None
193}
194
195#[cfg(target_os = "macos")]
196fn parent_of(pid: u32) -> Option<u32> {
197    let out = Command::new("ps")
198        .args(["-o", "ppid=", "-p", &pid.to_string()])
199        .output()
200        .ok()?;
201    if !out.status.success() {
202        return None;
203    }
204    String::from_utf8_lossy(&out.stdout).trim().parse().ok()
205}
206
207#[cfg(not(any(target_os = "linux", target_os = "macos")))]
208#[allow(dead_code)]
209fn parent_of(_pid: u32) -> Option<u32> {
210    None
211}
212
213#[allow(dead_code)]
214fn warn_scan_failed(what: &str) {
215    if SCAN_WARNING.set(()).is_ok() {
216        eprintln!(
217            "{} could not scan processes: {}",
218            console::style("warning:").yellow(),
219            what
220        );
221    }
222}
223
224/// Populate and return the cached cwd scan (all processes, not filtered).
225fn cwd_scan() -> &'static [(u32, String, PathBuf)] {
226    CWD_SCAN_CACHE.get_or_init(raw_cwd_scan).as_slice()
227}
228
229#[cfg(target_os = "linux")]
230fn raw_cwd_scan() -> Vec<(u32, String, PathBuf)> {
231    let mut out = Vec::new();
232    let proc_dir = match std::fs::read_dir("/proc") {
233        Ok(d) => d,
234        Err(e) => {
235            warn_scan_failed(&format!("/proc unreadable: {}", e));
236            return out;
237        }
238    };
239    for entry in proc_dir.flatten() {
240        let name = entry.file_name();
241        let name = name.to_string_lossy();
242        let pid: u32 = match name.parse() {
243            Ok(n) => n,
244            Err(_) => continue,
245        };
246        let cwd_link = entry.path().join("cwd");
247        let cwd = match std::fs::read_link(&cwd_link) {
248            Ok(p) => p,
249            Err(_) => continue,
250        };
251        // canonicalize so symlinked / bind-mounted cwds match the target.
252        // On Linux, readlink on /proc/<pid>/cwd returns " (deleted)" if the
253        // process's cwd was unlinked; canonicalize fails and we fall back.
254        let cwd_canon = cwd.canonicalize().unwrap_or(cwd.clone());
255        let cmd = std::fs::read_to_string(entry.path().join("comm"))
256            .map(|s| s.trim().to_string())
257            .unwrap_or_default();
258        out.push((pid, cmd, cwd_canon));
259    }
260    out
261}
262
263/// Heuristic: does a cmd string look like an argv[0] that was overwritten
264/// with a version or status string rather than a program name? Example from
265/// the wild: Claude Code rewrites argv[0] to "2.1.104". `lsof` reports argv[0]
266/// for macOS processes, so these junk values bleed into busy reporting.
267/// We detect the pattern (all digits, dots, and optional leading `v`) and
268/// fall back to a `ps -o comm=` lookup, which returns the kernel-recorded
269/// basename.
270///
271/// Linux's `/proc/<pid>/comm` already reports the kernel-recorded name so
272/// this heuristic is only used on macOS; the tests remain cross-platform.
273#[cfg_attr(not(any(target_os = "macos", test)), allow(dead_code))]
274fn is_suspicious_cmd(cmd: &str) -> bool {
275    if cmd.is_empty() {
276        return true;
277    }
278    let mut chars = cmd.chars();
279    let first = chars.next().unwrap();
280    let starts_ok = first == 'v' || first.is_ascii_digit();
281    if !starts_ok {
282        return false;
283    }
284    let mut seen_digit = first.is_ascii_digit();
285    for c in chars {
286        if c.is_ascii_digit() {
287            seen_digit = true;
288        } else if c != '.' {
289            return false;
290        }
291    }
292    seen_digit
293}
294
295#[cfg(target_os = "macos")]
296fn kernel_comm(pid: u32) -> Option<String> {
297    let out = Command::new("ps")
298        .args(["-o", "comm=", "-p", &pid.to_string()])
299        .output()
300        .ok()?;
301    if !out.status.success() {
302        return None;
303    }
304    let raw = String::from_utf8_lossy(&out.stdout).trim().to_string();
305    if raw.is_empty() {
306        return None;
307    }
308    // `ps -o comm=` on macOS returns the full executable path. Take basename.
309    let base = std::path::Path::new(&raw)
310        .file_name()
311        .map(|s| s.to_string_lossy().into_owned())
312        .unwrap_or(raw);
313    Some(base)
314}
315
316#[cfg(target_os = "macos")]
317fn raw_cwd_scan() -> Vec<(u32, String, PathBuf)> {
318    let mut out = Vec::new();
319    // `lsof -a -d cwd -F pcn` prints records of the form:
320    //   p<pid>\nc<cmd>\nn<path>\n
321    // `+c 0` disables lsof's default 9-char COMMAND truncation so multi-word
322    // names like "tmux: server" survive intact for the multiplexer filter.
323    let output = match Command::new("lsof")
324        .args(["-a", "-d", "cwd", "-F", "pcn", "+c", "0"])
325        .output()
326    {
327        Ok(o) => o,
328        Err(e) => {
329            warn_scan_failed(&format!("lsof unavailable: {}", e));
330            return out;
331        }
332    };
333    if !output.status.success() && output.stdout.is_empty() {
334        warn_scan_failed("lsof returned no output");
335        return out;
336    }
337    let stdout = String::from_utf8_lossy(&output.stdout);
338
339    let mut cur_pid: Option<u32> = None;
340    let mut cur_cmd = String::new();
341    for line in stdout.lines() {
342        if let Some(rest) = line.strip_prefix('p') {
343            cur_pid = rest.parse().ok();
344            cur_cmd.clear();
345        } else if let Some(rest) = line.strip_prefix('c') {
346            cur_cmd = rest.to_string();
347        } else if let Some(rest) = line.strip_prefix('n') {
348            if let Some(pid) = cur_pid {
349                let cwd = PathBuf::from(rest);
350                let cwd_canon = cwd.canonicalize().unwrap_or_else(|_| cwd.clone());
351                let cmd = if is_suspicious_cmd(&cur_cmd) {
352                    kernel_comm(pid).unwrap_or_else(|| cur_cmd.clone())
353                } else {
354                    cur_cmd.clone()
355                };
356                out.push((pid, cmd, cwd_canon));
357            }
358        }
359    }
360    out
361}
362
363#[cfg(not(any(target_os = "linux", target_os = "macos")))]
364fn raw_cwd_scan() -> Vec<(u32, String, PathBuf)> {
365    Vec::new()
366}
367
368/// Detect busy processes for a given worktree path.
369///
370/// Combines the lockfile signal and a process cwd scan. Filters out the
371/// current process tree so `gw delete` invoked from within the worktree
372/// does not self-report as busy.
373///
374/// Note: `detect_busy` calls `lockfile::read_and_clean_stale`, which removes
375/// lockfiles belonging to dead owners as a self-healing side effect. This
376/// means even read-only operations like `gw list` may mutate
377/// `<worktree>/.git/gw-session.lock` when a stale file is encountered.
378pub fn detect_busy(worktree: &Path) -> Vec<BusyInfo> {
379    let exclude_tree = self_process_tree();
380    let exclude_siblings = self_siblings();
381    let is_excluded = |pid: u32| exclude_tree.contains(&pid) || exclude_siblings.contains(&pid);
382    let mut out = Vec::new();
383
384    // Invariant: lockfile entries are pushed before the cwd scan so the
385    // dedup check below keeps the lockfile's richer `cmd` (e.g. "claude").
386    // Edge case: if the lockfile PID is in self_tree/self_siblings it is
387    // skipped entirely, and other PIDs found by the cwd scan are reported
388    // with whatever name `/proc/*/comm` or `lsof` provided — not the
389    // lockfile's cmd.
390    if let Some(entry) = lockfile::read_and_clean_stale(worktree) {
391        if !is_excluded(entry.pid) {
392            out.push(BusyInfo {
393                pid: entry.pid,
394                cmd: entry.cmd,
395                cwd: worktree.to_path_buf(),
396                source: BusySource::Lockfile,
397            });
398        }
399    }
400
401    for info in scan_cwd(worktree) {
402        if is_excluded(info.pid) {
403            continue;
404        }
405        if out.iter().any(|b| b.pid == info.pid) {
406            continue;
407        }
408        out.push(info);
409    }
410
411    out
412}
413
414/// Fast busy detection using only the session lockfile.
415///
416/// Unlike [`detect_busy`], this does not perform a system-wide process cwd
417/// scan (lsof on macOS, /proc walk on Linux). The cwd scan takes ~1.5s on
418/// typical macOS systems and dominates `gw list` latency, so read-only
419/// display paths use this variant.
420///
421/// This trades coverage for speed: worktrees entered via external `cd`
422/// without a `gw shell`/`gw start` session will not be flagged as busy.
423/// Commands that need strong busy guarantees (`gw delete`, `gw clean`)
424/// continue to use [`detect_busy`].
425///
426/// Like [`detect_busy`], this calls [`lockfile::read_and_clean_stale`]
427/// and may silently remove a stale `<worktree>/.git/gw-session.lock` as
428/// a self-healing side effect. `gw list` (the primary caller) therefore
429/// mutates lockfiles on every invocation, even though it is nominally
430/// read-only.
431pub fn detect_busy_lockfile_only(worktree: &Path) -> Vec<BusyInfo> {
432    // Skip self_siblings: it internally triggers cwd_scan (lsof / /proc walk)
433    // which is exactly what this fast path exists to avoid. Pipeline co-members
434    // of this gw invocation are short-lived CLI tools (e.g. `gw list | head`)
435    // that never call `gw shell`/`gw start`, so they cannot own a lockfile.
436    // Ancestor-only exclusion is sufficient in practice — and in the rare case
437    // where a true sibling (e.g. a backgrounded `gw start`) does own a
438    // lockfile, reporting its worktree as busy is correct, not a false positive.
439    let exclude_tree = self_process_tree();
440    let is_excluded = |pid: u32| exclude_tree.contains(&pid);
441    let mut out = Vec::new();
442
443    if let Some(entry) = lockfile::read_and_clean_stale(worktree) {
444        if !is_excluded(entry.pid) {
445            out.push(BusyInfo {
446                pid: entry.pid,
447                cmd: entry.cmd,
448                cwd: worktree.to_path_buf(),
449                source: BusySource::Lockfile,
450            });
451        }
452    }
453
454    out
455}
456
457/// Terminal multiplexers whose server process may have been launched from
458/// within a worktree but does not meaningfully "occupy" it — the real work
459/// happens in child shells / tools, which the cwd scan reports independently.
460/// Reporting the multiplexer itself just produces noise when running
461/// `gw delete` from a pane hosted by that multiplexer.
462///
463/// Matched against `/proc/<pid>/comm` on Linux (≤15 chars; may reflect
464/// `prctl(PR_SET_NAME)` rather than argv[0], e.g. "tmux: server") or `lsof`'s
465/// COMMAND field on macOS (we pass `+c 0` to disable its default 9-char
466/// truncation — see `raw_cwd_scan`). GNU screen's detached server renames
467/// itself to uppercase "SCREEN" via prctl, so both cases are listed.
468fn is_multiplexer(cmd: &str) -> bool {
469    matches!(
470        cmd,
471        "zellij" | "tmux" | "tmux: server" | "tmate" | "tmate: server" | "screen" | "SCREEN"
472    )
473}
474
475fn scan_cwd(worktree: &Path) -> Vec<BusyInfo> {
476    let canon_target = match worktree.canonicalize() {
477        Ok(p) => p,
478        Err(_) => return Vec::new(),
479    };
480    let mut out = Vec::new();
481    for (pid, cmd, cwd) in cwd_scan() {
482        // Both sides were canonicalized upstream (handles macOS /var vs
483        // /private/var skew). This starts_with is the containment check.
484        if cwd.starts_with(&canon_target) {
485            if is_multiplexer(cmd) {
486                continue;
487            }
488            out.push(BusyInfo {
489                pid: *pid,
490                cmd: cmd.clone(),
491                cwd: cwd.clone(),
492                source: BusySource::ProcessScan,
493            });
494        }
495    }
496    out
497}
498
499#[cfg(test)]
500mod tests {
501    use super::*;
502
503    #[test]
504    fn is_suspicious_cmd_flags_version_strings() {
505        assert!(is_suspicious_cmd(""));
506        assert!(is_suspicious_cmd("2.1.104"));
507        assert!(is_suspicious_cmd("0.0.1"));
508        assert!(is_suspicious_cmd("v1.2.3"));
509        assert!(is_suspicious_cmd("42"));
510    }
511
512    #[test]
513    fn is_suspicious_cmd_accepts_real_names() {
514        assert!(!is_suspicious_cmd("claude"));
515        assert!(!is_suspicious_cmd("node"));
516        assert!(!is_suspicious_cmd("zsh"));
517        assert!(!is_suspicious_cmd("tmux: server"));
518        assert!(!is_suspicious_cmd("python3"));
519        assert!(!is_suspicious_cmd("v"));
520        assert!(!is_suspicious_cmd("vim"));
521    }
522
523    #[test]
524    fn is_multiplexer_matches_known_names() {
525        for name in [
526            "zellij",
527            "tmux",
528            "tmux: server",
529            "tmate",
530            "tmate: server",
531            "screen",
532            "SCREEN",
533        ] {
534            assert!(is_multiplexer(name), "expected match for {:?}", name);
535        }
536    }
537
538    #[test]
539    fn is_multiplexer_rejects_non_multiplexers() {
540        for name in [
541            "",
542            "zsh",
543            "bash",
544            "claude",
545            "tmuxinator",
546            "ztmux",
547            "zellij-server",
548            "Screen",
549        ] {
550            assert!(!is_multiplexer(name), "expected no match for {:?}", name);
551        }
552    }
553
554    #[test]
555    fn self_tree_contains_current_pid() {
556        let tree = self_process_tree();
557        assert!(tree.contains(&std::process::id()));
558    }
559
560    #[cfg(unix)]
561    #[test]
562    fn self_tree_contains_parent_pid() {
563        let tree = self_process_tree();
564        let ppid = unsafe { libc::getppid() } as u32;
565        assert!(
566            tree.contains(&ppid),
567            "expected tree to contain ppid {}",
568            ppid
569        );
570    }
571
572    #[cfg(any(target_os = "linux", target_os = "macos"))]
573    #[test]
574    fn scan_cwd_finds_child_with_cwd_in_tempdir() {
575        use std::process::{Command, Stdio};
576        use std::thread::sleep;
577        use std::time::{Duration, Instant};
578
579        let dir = tempfile::TempDir::new().unwrap();
580        let mut child = Command::new("sleep")
581            .arg("30")
582            .current_dir(dir.path())
583            .stdout(Stdio::null())
584            .stderr(Stdio::null())
585            .spawn()
586            .expect("spawn sleep");
587
588        // Give the OS a beat to register the child's cwd so the first scan
589        // usually succeeds; then fall back to polling for slow CI hosts.
590        // raw_cwd_scan() bypasses the module-static cache (which may have
591        // been populated before the child existed).
592        sleep(Duration::from_millis(50));
593        let canon = dir
594            .path()
595            .canonicalize()
596            .unwrap_or(dir.path().to_path_buf());
597        let matches = |raw: &[(u32, String, std::path::PathBuf)]| -> bool {
598            raw.iter()
599                .any(|(p, _, cwd)| *p == child.id() && cwd.starts_with(&canon))
600        };
601        let mut found = matches(&raw_cwd_scan());
602        if !found {
603            let deadline = Instant::now() + Duration::from_secs(2);
604            while Instant::now() < deadline {
605                if matches(&raw_cwd_scan()) {
606                    found = true;
607                    break;
608                }
609                sleep(Duration::from_millis(50));
610            }
611        }
612
613        let _ = child.kill();
614        let _ = child.wait();
615
616        assert!(
617            found,
618            "expected to find child pid={} with cwd in {:?}",
619            child.id(),
620            dir.path()
621        );
622    }
623}