Skip to main content

open_loops/
scanner.rs

1//! Repository and unmerged-branch discovery via git shell-out.
2//! Design decision: shell-out (not git2/gix) — simple and debuggable;
3//! the product performance bottleneck is the LLM, not git.
4use anyhow::{bail, Context, Result};
5use chrono::{DateTime, Utc};
6use std::path::{Path, PathBuf};
7use std::process::Command;
8
9/// Runs a git subcommand in `repo` and returns trimmed stdout.
10///
11/// # Errors
12///
13/// Returns `Err` if git is not in PATH or if the command fails.
14pub(crate) fn git(repo: &Path, args: &[&str]) -> Result<String> {
15    let out = Command::new("git")
16        .arg("-C")
17        .arg(repo)
18        .args(args)
19        .output()
20        .context("git not found in PATH — install git")?;
21    if !out.status.success() {
22        bail!(
23            "git {:?} failed in {}: {}",
24            args,
25            repo.display(),
26            String::from_utf8_lossy(&out.stderr).trim()
27        );
28    }
29    Ok(String::from_utf8_lossy(&out.stdout).trim().to_string())
30}
31
32/// Default branch: origin/HEAD if it exists; otherwise main; otherwise master.
33///
34/// # Errors
35///
36/// Returns `Err` if no default branch is found.
37pub fn default_branch(repo: &Path) -> Result<String> {
38    if let Ok(sym) = git(
39        repo,
40        &["symbolic-ref", "--short", "refs/remotes/origin/HEAD"],
41    ) {
42        if let Some(branch) = sym.strip_prefix("origin/") {
43            return Ok(branch.to_string());
44        }
45    }
46    for candidate in ["main", "master"] {
47        if git(
48            repo,
49            &["rev-parse", "--verify", &format!("refs/heads/{candidate}")],
50        )
51        .is_ok()
52        {
53            return Ok(candidate.to_string());
54        }
55    }
56    bail!(
57        "couldn't find the default branch in {} (expected origin/HEAD, main or master)",
58        repo.display()
59    )
60}
61
62/// An open loop: an unmerged branch with its own commits.
63#[derive(Debug, Clone)]
64pub struct OpenLoop {
65    pub root_label: String,
66    pub repo_name: String,
67    pub repo_path: PathBuf,
68    pub branch: String,
69    pub head_sha: String,
70    pub last_commit: DateTime<Utc>,
71    pub ahead: u32,
72    pub behind: u32,
73}
74
75impl OpenLoop {
76    /// Canonical key used in resume/ignore: "root-label/repo/branch".
77    pub fn key(&self) -> String {
78        format!("{}/{}/{}", self.root_label, self.repo_name, self.branch)
79    }
80}
81
82const SKIP_DIRS: [&str; 2] = ["node_modules", "target"];
83
84fn looks_like_bare(dir: &Path) -> bool {
85    dir.join("HEAD").is_file() && dir.join("objects").is_dir() && dir.join("refs").is_dir()
86}
87
88fn is_repo_candidate(dir: &Path) -> bool {
89    dir.join(".git").exists() || looks_like_bare(dir)
90}
91
92/// Derives a stable repo name from the absolute git common-dir (§5 of Spec Fase A).
93pub fn repo_name_from_common_dir(common_dir: &Path) -> String {
94    let base = common_dir
95        .file_name()
96        .map(|n| n.to_string_lossy().into_owned())
97        .unwrap_or_default();
98    if base == ".git" || base == ".bare" {
99        return common_dir
100            .parent()
101            .and_then(|p| p.file_name())
102            .map(|n| n.to_string_lossy().into_owned())
103            .unwrap_or(base);
104    }
105    base.strip_suffix(".git").map(str::to_owned).unwrap_or(base)
106}
107
108/// Absolute path of the git common-dir for `path` (bare store / `.git` dir).
109///
110/// # Errors
111///
112/// Returns `Err` when `path` is not inside a git repository.
113pub fn git_common_dir(path: &Path) -> Result<PathBuf> {
114    let raw = git(
115        path,
116        &["rev-parse", "--path-format=absolute", "--git-common-dir"],
117    )?;
118    Ok(PathBuf::from(raw))
119}
120
121/// Walks roots up to `scan_depth` looking for git repo candidates, then
122/// deduplicates by absolute `--git-common-dir`.
123pub fn find_repos(roots: &[PathBuf], scan_depth: usize) -> (Vec<PathBuf>, Vec<String>) {
124    let mut candidates = Vec::new();
125    for root in roots {
126        walk(root, 0, scan_depth, &mut candidates);
127    }
128    dedup_candidates(candidates)
129}
130
131fn dedup_candidates(candidates: Vec<PathBuf>) -> (Vec<PathBuf>, Vec<String>) {
132    use std::collections::HashMap;
133    let mut by_common: HashMap<PathBuf, PathBuf> = HashMap::new();
134    let mut warnings = Vec::new();
135    for candidate in candidates {
136        match git_common_dir(&candidate) {
137            Ok(common) => {
138                by_common.entry(common).or_insert(candidate);
139            }
140            Err(e) => {
141                warnings.push(format!("{}: {e:#}", candidate.display()));
142            }
143        }
144    }
145    let mut repos: Vec<PathBuf> = by_common.into_values().collect();
146    repos.sort();
147    (repos, warnings)
148}
149
150fn walk(dir: &Path, depth: usize, scan_depth: usize, candidates: &mut Vec<PathBuf>) {
151    if is_repo_candidate(dir) {
152        candidates.push(dir.to_path_buf());
153        return;
154    }
155    if depth >= scan_depth {
156        return;
157    }
158    let Ok(entries) = std::fs::read_dir(dir) else {
159        return;
160    };
161    for entry in entries.flatten() {
162        let path = entry.path();
163        let name = entry.file_name();
164        let name = name.to_string_lossy();
165        if !path.is_dir() || name.starts_with('.') || SKIP_DIRS.contains(&name.as_ref()) {
166            continue;
167        }
168        walk(&path, depth + 1, scan_depth, candidates);
169    }
170}
171
172/// Returns all unmerged branches (except default) in a repo.
173///
174/// # Errors
175///
176/// Returns `Err` if git fails or if the default branch is not found.
177pub fn open_loops(repo: &Path, root_label: &str) -> Result<Vec<OpenLoop>> {
178    let default = default_branch(repo)?;
179    let common_dir = git_common_dir(repo)?;
180    let repo_name = repo_name_from_common_dir(&common_dir);
181    let merged: std::collections::HashSet<String> = git(
182        repo,
183        &["branch", "--merged", &default, "--format=%(refname:short)"],
184    )?
185    .lines()
186    .map(|s| s.trim().to_string())
187    .collect();
188    let raw = git(
189        repo,
190        &[
191            "for-each-ref",
192            "refs/heads",
193            "--format=%(refname:short)%09%(objectname)%09%(committerdate:iso8601-strict)",
194        ],
195    )?;
196    let mut result = Vec::new();
197    for line in raw.lines() {
198        let mut parts = line.split('\t');
199        let (Some(branch), Some(sha), Some(date)) = (parts.next(), parts.next(), parts.next())
200        else {
201            eprintln!("warning: unexpected line from git for-each-ref ignored: {line:?}");
202            continue;
203        };
204        if branch == default || merged.contains(branch) {
205            continue;
206        }
207        let counts = git(
208            repo,
209            &[
210                "rev-list",
211                "--left-right",
212                "--count",
213                &format!("{default}...{branch}"),
214            ],
215        )?;
216        let mut c = counts.split_whitespace();
217        let behind: u32 = c.next().unwrap_or("0").parse().unwrap_or(0);
218        let ahead: u32 = c.next().unwrap_or("0").parse().unwrap_or(0);
219        let last_commit = DateTime::parse_from_rfc3339(date)
220            .with_context(|| format!("invalid date from git: {date}"))?
221            .with_timezone(&Utc);
222        result.push(OpenLoop {
223            root_label: root_label.to_string(),
224            repo_name: repo_name.clone(),
225            repo_path: repo.to_path_buf(),
226            branch: branch.to_string(),
227            head_sha: sha.to_string(),
228            last_commit,
229            ahead,
230            behind,
231        });
232    }
233    Ok(result)
234}
235
236/// Scans all repos found under the roots in parallel.
237///
238/// Individual repo failures become warnings and never abort the scan.
239pub fn scan(
240    roots: &[PathBuf],
241    labels: &[(PathBuf, String)],
242    scan_depth: usize,
243) -> (Vec<OpenLoop>, Vec<String>) {
244    let (repos, mut warnings) = find_repos(roots, scan_depth);
245    let results: Vec<Result<Vec<OpenLoop>>> = std::thread::scope(|s| {
246        let handles: Vec<_> = repos
247            .iter()
248            .map(|repo| {
249                let label = crate::config::label_for_repo(labels, repo);
250                s.spawn(move || open_loops(repo, &label))
251            })
252            .collect();
253        handles
254            .into_iter()
255            .map(|h| {
256                h.join()
257                    .unwrap_or_else(|_| Err(anyhow::anyhow!("panic while scanning repository")))
258            })
259            .collect()
260    });
261    let mut all = Vec::new();
262    for (repo, res) in repos.iter().zip(results) {
263        match res {
264            Ok(mut loops) => all.append(&mut loops),
265            Err(e) => warnings.push(format!("{}: {e:#}", repo.display())),
266        }
267    }
268    (all, warnings)
269}
270
271/// Branch-exclusive commits relative to the default (for the distillation prompt).
272///
273/// # Errors
274///
275/// Returns `Err` if git fails.
276pub fn git_log(repo: &Path, default: &str, branch: &str) -> Result<String> {
277    git(repo, &["log", "--oneline", &format!("{default}..{branch}")])
278}
279
280/// Diffstat of the branch against the base (for the distillation prompt).
281///
282/// # Errors
283///
284/// Returns `Err` if git fails.
285pub fn diffstat(repo: &Path, default: &str, branch: &str) -> Result<String> {
286    git(repo, &["diff", "--stat", &format!("{default}...{branch}")])
287}
288
289/// Time window of the branch-exclusive commits.
290///
291/// Used to filter out AI sessions that predate the branch work.
292///
293/// # Errors
294///
295/// Returns `Err` if git fails or if there are no commits on the branch.
296pub fn commit_window(
297    repo: &Path,
298    default: &str,
299    branch: &str,
300) -> Result<(DateTime<Utc>, DateTime<Utc>)> {
301    let raw = git(
302        repo,
303        &["log", "--format=%cI", &format!("{default}..{branch}")],
304    )?;
305    let mut dates: Vec<DateTime<Utc>> = raw
306        .lines()
307        .filter_map(|l| DateTime::parse_from_rfc3339(l.trim()).ok())
308        .map(|d| d.with_timezone(&Utc))
309        .collect();
310    if dates.is_empty() {
311        // branch has no exclusive commit: fall back to its latest commit
312        let head = git(repo, &["log", "-1", "--format=%cI", branch])?;
313        dates.push(DateTime::parse_from_rfc3339(head.trim())?.with_timezone(&Utc));
314    }
315    let min = dates
316        .iter()
317        .min()
318        .copied()
319        .ok_or_else(|| anyhow::anyhow!("no commit dates for {branch}"))?;
320    let max = dates
321        .iter()
322        .max()
323        .copied()
324        .ok_or_else(|| anyhow::anyhow!("no commit dates for {branch}"))?;
325    Ok((min, max))
326}
327
328#[cfg(test)]
329mod tests {
330    use super::*;
331    use crate::testutil;
332
333    #[test]
334    fn default_branch_detects_main() {
335        let tmp = tempfile::tempdir().unwrap();
336        let repo = tmp.path().join("app");
337        testutil::init_repo(&repo);
338        assert_eq!(default_branch(&repo).unwrap(), "main");
339    }
340
341    #[test]
342    fn git_fails_with_contextual_message() {
343        let tmp = tempfile::tempdir().unwrap();
344        // directory is not a git repo
345        let err = git(tmp.path(), &["status"]).unwrap_err();
346        assert!(err.to_string().contains(&tmp.path().display().to_string()));
347    }
348
349    #[test]
350    fn find_repos_dedups_container_and_worktrees() {
351        let tmp = tempfile::tempdir().unwrap();
352        let container = tmp.path().join("my-app");
353        testutil::init_bare_worktree_container(&container);
354        let dev = container.join("dev");
355        testutil::add_named_worktree(&container, "dev", "dev");
356        let (repos, warnings) = find_repos(&[container.clone(), dev], 4);
357        assert!(warnings.is_empty(), "unexpected warnings: {warnings:?}");
358        assert_eq!(repos.len(), 1);
359        assert_eq!(repos[0], container);
360    }
361
362    #[test]
363    fn find_repos_respects_scan_depth_and_skips_hidden() {
364        let tmp = tempfile::tempdir().unwrap();
365        testutil::init_repo(&tmp.path().join("a/b/c/repo-deep"));
366        testutil::init_repo(&tmp.path().join("a/b/repo-mid"));
367        testutil::init_repo(&tmp.path().join("repo-shallow"));
368        testutil::init_repo(&tmp.path().join(".hidden/repo3"));
369
370        let (repos, _) = find_repos(&[tmp.path().to_path_buf()], 4);
371        let names: Vec<_> = repos
372            .iter()
373            .filter_map(|r| r.file_name())
374            .map(|n| n.to_string_lossy().into_owned())
375            .collect();
376        assert!(names.contains(&"repo-deep".to_string()));
377        assert!(names.contains(&"repo-mid".to_string()));
378        assert!(names.contains(&"repo-shallow".to_string()));
379        assert!(!names.contains(&"repo3".to_string()));
380
381        let (shallow, _) = find_repos(&[tmp.path().to_path_buf()], 2);
382        let shallow_names: Vec<_> = shallow
383            .iter()
384            .filter_map(|r| r.file_name())
385            .map(|n| n.to_string_lossy().into_owned())
386            .collect();
387        assert!(!shallow_names.contains(&"repo-deep".to_string()));
388        assert!(shallow_names.contains(&"repo-shallow".to_string()));
389    }
390
391    #[test]
392    fn find_repos_finds_normal_git_dir_repo() {
393        let tmp = tempfile::tempdir().unwrap();
394        testutil::init_repo(&tmp.path().join("app"));
395        let (repos, _) = find_repos(&[tmp.path().to_path_buf()], 4);
396        assert_eq!(repos.len(), 1);
397    }
398
399    #[test]
400    fn find_repos_finds_bare_worktree_container_via_git_file() {
401        let tmp = tempfile::tempdir().unwrap();
402        let container = tmp.path().join("my-app");
403        testutil::init_bare_worktree_container(&container);
404        let (repos, _) = find_repos(&[tmp.path().to_path_buf()], 4);
405        assert_eq!(repos.len(), 1);
406        assert_eq!(repos[0], container);
407    }
408
409    #[test]
410    fn find_repos_finds_pure_bare_repo() {
411        let tmp = tempfile::tempdir().unwrap();
412        let bare = tmp.path().join("foo.git");
413        testutil::init_bare_repo(&bare);
414        testutil::seed_bare_main(&bare);
415        let (repos, _) = find_repos(&[tmp.path().to_path_buf()], 4);
416        assert_eq!(repos.len(), 1);
417        assert_eq!(repos[0], bare);
418    }
419
420    #[test]
421    fn open_loops_uses_common_dir_repo_name_in_bare_layout() {
422        let tmp = tempfile::tempdir().unwrap();
423        let container = tmp.path().join("my-app");
424        testutil::init_bare_worktree_container(&container);
425        testutil::add_named_worktree(&container, "dev", "dev");
426        testutil::add_branch_on_bare(&container.join(".bare"), "feat/x", "x.txt");
427
428        let loops = open_loops(&container, "root").unwrap();
429        assert_eq!(loops.len(), 1);
430        assert_eq!(loops[0].repo_name, "my-app");
431        assert_eq!(loops[0].branch, "feat/x");
432        assert_eq!(loops[0].key(), "root/my-app/feat/x");
433    }
434
435    #[test]
436    fn open_loops_bare_root_repo_name_strips_dot_git_suffix() {
437        let tmp = tempfile::tempdir().unwrap();
438        let bare = tmp.path().join("foo.git");
439        testutil::init_bare_repo(&bare);
440        testutil::seed_bare_main(&bare);
441        testutil::add_branch_on_bare(&bare, "feat/y", "y.txt");
442
443        let loops = open_loops(&bare, "r").unwrap();
444        assert_eq!(loops[0].repo_name, "foo");
445    }
446
447    #[test]
448    fn open_loops_finds_unmerged_ignores_merged_and_default() {
449        let tmp = tempfile::tempdir().unwrap();
450        let repo = tmp.path().join("app");
451        testutil::init_repo(&repo);
452        testutil::add_branch_with_commit(&repo, "feat/x", "x.txt");
453        testutil::git(&repo, &["branch", "merged"]); // points to main => merged
454
455        let loops = open_loops(&repo, "root").unwrap();
456        assert_eq!(loops.len(), 1);
457        let l = &loops[0];
458        assert_eq!(l.branch, "feat/x");
459        assert_eq!(l.repo_name, "app");
460        assert_eq!(l.root_label, "root");
461        assert_eq!(l.key(), "root/app/feat/x");
462        assert_eq!(l.ahead, 1);
463        assert_eq!(l.behind, 0);
464        assert_eq!(l.head_sha.len(), 40);
465    }
466
467    #[test]
468    fn scan_aggregates_repos_and_reports_warning_without_aborting() {
469        let tmp = tempfile::tempdir().unwrap();
470        let good = tmp.path().join("good");
471        testutil::init_repo(&good);
472        testutil::add_branch_with_commit(&good, "feat/ok", "ok.txt");
473        // truly broken repo: no commits, so default_branch fails
474        let empty = tmp.path().join("empty");
475        std::fs::create_dir_all(&empty).unwrap();
476        testutil::git(&empty, &["init", "-b", "main"]);
477
478        let labels = vec![(tmp.path().to_path_buf(), "r".to_string())];
479        let (loops, warnings) = scan(&[tmp.path().to_path_buf()], &labels, 4);
480        assert_eq!(loops.len(), 1);
481        assert_eq!(loops[0].key(), "r/good/feat/ok");
482        assert_eq!(warnings.len(), 1);
483        assert!(warnings[0].contains("empty"));
484    }
485
486    #[test]
487    fn context_helpers_return_commits_and_window() {
488        let tmp = tempfile::tempdir().unwrap();
489        let repo = tmp.path().join("app");
490        testutil::init_repo(&repo);
491        testutil::add_branch_with_commit(&repo, "feat/x", "x.txt");
492
493        let log = git_log(&repo, "main", "feat/x").unwrap();
494        assert!(log.contains("wip feat/x"));
495        let stat = diffstat(&repo, "main", "feat/x").unwrap();
496        assert!(stat.contains("x.txt"));
497        let (start, end) = commit_window(&repo, "main", "feat/x").unwrap();
498        assert!(start <= end);
499    }
500
501    #[test]
502    fn default_branch_detects_master_fallback() {
503        let tmp = tempfile::tempdir().unwrap();
504        let repo = tmp.path();
505        testutil::git(repo, &["init", "-b", "master"]);
506        std::fs::write(repo.join("a.txt"), "a").unwrap();
507        testutil::git(repo, &["add", "."]);
508        testutil::git(repo, &["commit", "-m", "init"]);
509        assert_eq!(default_branch(repo).unwrap(), "master");
510    }
511
512    #[test]
513    fn default_branch_errors_without_main_or_master() {
514        let tmp = tempfile::tempdir().unwrap();
515        let repo = tmp.path();
516        testutil::git(repo, &["init", "-b", "trunk"]);
517        // no commits: refs/heads/main and refs/heads/master do not exist
518        let err = default_branch(repo).unwrap_err();
519        assert!(err.to_string().contains("couldn't find the default branch"));
520    }
521
522    #[test]
523    fn git_common_dir_resolves_normal_and_bare_pointer() {
524        let tmp = tempfile::tempdir().unwrap();
525        let normal = tmp.path().join("app");
526        testutil::init_repo(&normal);
527        let normal_common = git_common_dir(&normal).unwrap();
528        assert!(normal_common.ends_with(".git"));
529
530        let container = tmp.path().join("container");
531        testutil::init_bare_worktree_container(&container);
532        let bare_common = git_common_dir(&container).unwrap();
533        assert!(bare_common.ends_with(".bare"));
534    }
535
536    #[test]
537    fn repo_name_from_common_dir_table() {
538        use std::path::Path;
539
540        let cases: &[(&str, &str)] = &[
541            ("/home/u/my-app/.bare", "my-app"),
542            ("/home/u/app/.git", "app"),
543            ("/srv/git/foo.git", "foo"),
544            ("/srv/git/myproject", "myproject"),
545        ];
546        for (common, want) in cases {
547            assert_eq!(
548                repo_name_from_common_dir(Path::new(common)),
549                *want,
550                "common_dir={common}"
551            );
552        }
553    }
554}