Skip to main content

harn_hostlib/scanner/
git.rs

1//! Git-backed scanner inputs.
2//!
3//! The scanner core consumes Git through this capability boundary so tests
4//! can exercise tracked-file and churn behavior without depending on the
5//! ambient checkout, Git hooks, fsmonitor, or hook-time environment state.
6
7use std::collections::BTreeMap;
8use std::path::Path;
9use std::process::Command;
10
11/// Git data needed by the scanner.
12pub trait GitCapabilities {
13    /// Return tracked and untracked file paths relative to `root`.
14    fn list_files(&self, root: &Path) -> Option<Vec<String>>;
15
16    /// Return normalized 0..1 churn scores keyed by paths relative to `root`.
17    fn churn_scores(&self, root: &Path) -> BTreeMap<String, f64>;
18}
19
20/// Production [`GitCapabilities`] implementation backed by the `git` CLI.
21#[derive(Debug, Default)]
22pub struct CliGitCapabilities;
23
24impl GitCapabilities for CliGitCapabilities {
25    fn list_files(&self, root: &Path) -> Option<Vec<String>> {
26        if !has_git_repository_marker(root) {
27            return None;
28        }
29
30        let mut cmd = Command::new("git");
31        super::strip_ambient_git_env(&mut cmd);
32        let output = cmd
33            .args([
34                "-C",
35                root.to_str()?,
36                "ls-files",
37                "--cached",
38                "--others",
39                "--exclude-standard",
40            ])
41            .output()
42            .ok()?;
43        if !output.status.success() {
44            return None;
45        }
46        let stdout = String::from_utf8(output.stdout).ok()?;
47        let entries: Vec<String> = stdout
48            .lines()
49            .filter(|line| !line.is_empty())
50            .map(str::to_string)
51            .collect();
52        if entries.is_empty() {
53            None
54        } else {
55            Some(entries)
56        }
57    }
58
59    fn churn_scores(&self, root: &Path) -> BTreeMap<String, f64> {
60        if !has_git_repository_marker(root) {
61            return BTreeMap::new();
62        }
63
64        let mut cmd = Command::new("git");
65        super::strip_ambient_git_env(&mut cmd);
66        let output = cmd
67            .args([
68                "-C",
69                match root.to_str() {
70                    Some(s) => s,
71                    None => return BTreeMap::new(),
72                },
73                "log",
74                "--since=90.days",
75                "--name-only",
76                "--pretty=format:",
77            ])
78            .output();
79        let output = match output {
80            Ok(o) if o.status.success() => o,
81            _ => return BTreeMap::new(),
82        };
83        let stdout = match String::from_utf8(output.stdout) {
84            Ok(s) => s,
85            Err(_) => return BTreeMap::new(),
86        };
87
88        let mut counts: BTreeMap<String, usize> = BTreeMap::new();
89        for line in stdout.lines() {
90            let trimmed = line.trim();
91            if trimmed.is_empty() {
92                continue;
93            }
94            *counts.entry(trimmed.to_string()).or_insert(0) += 1;
95        }
96
97        let max = counts.values().copied().max().unwrap_or(1).max(1) as f64;
98        counts
99            .into_iter()
100            .map(|(file, count)| (file, count as f64 / max))
101            .collect()
102    }
103}
104
105/// Returns true when `root` is inside a Git worktree based on local marker files.
106///
107/// This deliberately avoids shelling out to `git rev-parse`: the default
108/// capability uses this predicate to decide whether spawning Git is appropriate.
109fn has_git_repository_marker(root: &Path) -> bool {
110    root.ancestors().any(|dir| dir.join(".git").exists())
111}
112
113#[cfg(test)]
114mod tests {
115    use super::*;
116    use std::fs;
117    use tempfile::tempdir;
118
119    #[test]
120    fn marker_detection_handles_plain_and_worktree_git_markers() {
121        let tmp = tempdir().unwrap();
122        let root = tmp.path();
123
124        assert!(!has_git_repository_marker(root));
125
126        fs::write(root.join(".git"), "gitdir: /tmp/example\n").unwrap();
127        assert!(has_git_repository_marker(root));
128        assert!(has_git_repository_marker(&root.join("nested")));
129    }
130}