Skip to main content

mollify_core/
git.rs

1//! Git integration for the PR gate. Computes changed files (working tree +
2//! staged + optionally vs a base ref) and **changed line ranges** so findings
3//! can be attributed introduced-vs-inherited at line granularity (parsed from
4//! `git diff --unified=0`), with file-level as the fallback.
5
6use camino::Utf8Path;
7use rustc_hash::FxHashSet;
8use std::process::Command;
9
10/// Return the set of changed file paths (relative to `root`), or `None` if this
11/// isn't a git repo / git is unavailable. Includes unstaged, staged, untracked,
12/// and (if `base` is given) everything changed since the merge-base with `base`.
13pub fn changed_files(root: &Utf8Path, base: Option<&str>) -> Option<FxHashSet<String>> {
14    // Quick check: is this a git work tree?
15    let ok = Command::new("git")
16        .arg("-C")
17        .arg(root.as_str())
18        .args(["rev-parse", "--is-inside-work-tree"])
19        .output()
20        .ok()?;
21    if !ok.status.success() {
22        return None;
23    }
24
25    let mut set = FxHashSet::default();
26    let mut add = |args: &[&str]| {
27        if let Ok(out) = Command::new("git")
28            .arg("-C")
29            .arg(root.as_str())
30            .args(args)
31            .output()
32        {
33            if out.status.success() {
34                for line in String::from_utf8_lossy(&out.stdout).lines() {
35                    let l = line.trim();
36                    if !l.is_empty() {
37                        set.insert(l.to_string());
38                    }
39                }
40            }
41        }
42    };
43
44    add(&["diff", "--name-only"]); // unstaged
45    add(&["diff", "--name-only", "--cached"]); // staged
46    add(&["ls-files", "--others", "--exclude-standard"]); // untracked
47    if let Some(base) = base {
48        let range = format!("{base}...HEAD");
49        add(&["diff", "--name-only", &range]);
50    }
51    Some(set)
52}
53
54/// Added/modified line ranges per file (relative paths) from `git diff
55/// --unified=0`, combining unstaged + staged + (if `base`) the base range, plus
56/// whole-file ranges for untracked files. `None` if not a git repo. Enables
57/// **line-level** introduced-vs-inherited attribution.
58pub fn changed_lines(
59    root: &Utf8Path,
60    base: Option<&str>,
61) -> Option<rustc_hash::FxHashMap<String, Vec<(u32, u32)>>> {
62    let ok = Command::new("git")
63        .arg("-C")
64        .arg(root.as_str())
65        .args(["rev-parse", "--is-inside-work-tree"])
66        .output()
67        .ok()?;
68    if !ok.status.success() {
69        return None;
70    }
71    let mut map: rustc_hash::FxHashMap<String, Vec<(u32, u32)>> = rustc_hash::FxHashMap::default();
72    let mut add_diff = |args: &[&str]| {
73        if let Ok(out) = Command::new("git")
74            .arg("-C")
75            .arg(root.as_str())
76            .args(args)
77            .output()
78        {
79            if out.status.success() {
80                parse_unified0(&String::from_utf8_lossy(&out.stdout), &mut map);
81            }
82        }
83    };
84    add_diff(&["diff", "--unified=0"]);
85    add_diff(&["diff", "--unified=0", "--cached"]);
86    if let Some(base) = base {
87        let range = format!("{base}...HEAD");
88        add_diff(&["diff", "--unified=0", &range]);
89    }
90    // Untracked files: the whole file is "introduced".
91    if let Ok(out) = Command::new("git")
92        .arg("-C")
93        .arg(root.as_str())
94        .args(["ls-files", "--others", "--exclude-standard"])
95        .output()
96    {
97        if out.status.success() {
98            for f in String::from_utf8_lossy(&out.stdout).lines() {
99                let f = f.trim();
100                if !f.is_empty() {
101                    map.entry(f.to_string()).or_default().push((1, u32::MAX));
102                }
103            }
104        }
105    }
106    Some(map)
107}
108
109/// Parse `git diff --unified=0` output, recording added-line ranges per `+++`
110/// file from each `@@ … +start[,len] @@` hunk header.
111fn parse_unified0(diff: &str, map: &mut rustc_hash::FxHashMap<String, Vec<(u32, u32)>>) {
112    let mut current: Option<String> = None;
113    for line in diff.lines() {
114        if let Some(rest) = line.strip_prefix("+++ ") {
115            // `+++ b/path` (or `+++ /dev/null` for deletions).
116            current = rest
117                .strip_prefix("b/")
118                .or(Some(rest))
119                .filter(|p| *p != "/dev/null")
120                .map(|p| p.to_string());
121        } else if line.starts_with("@@ ") {
122            // @@ -a,b +c,d @@
123            if let Some(plus) = line.split('+').nth(1) {
124                let spec = plus.split([' ', '@']).next().unwrap_or("");
125                let mut it = spec.split(',');
126                let start: u32 = it.next().and_then(|s| s.trim().parse().ok()).unwrap_or(0);
127                let len: u32 = it.next().and_then(|s| s.trim().parse().ok()).unwrap_or(1);
128                if start > 0 && len > 0 {
129                    if let Some(f) = &current {
130                        map.entry(f.clone())
131                            .or_default()
132                            .push((start, start + len - 1));
133                    }
134                }
135            }
136        }
137    }
138}
139
140/// Whether `line` of `finding_path` falls in a changed range from [`changed_lines`].
141pub fn line_is_changed(
142    root: &Utf8Path,
143    finding_path: &Utf8Path,
144    line: u32,
145    changed: &rustc_hash::FxHashMap<String, Vec<(u32, u32)>>,
146) -> Option<bool> {
147    let rel = finding_path
148        .strip_prefix(root)
149        .unwrap_or(finding_path)
150        .as_str()
151        .trim_start_matches("./");
152    let ranges = changed.get(rel).or_else(|| {
153        finding_path.file_name().and_then(|name| {
154            changed
155                .iter()
156                .find(|(k, _)| k.ends_with(name))
157                .map(|(_, v)| v)
158        })
159    })?;
160    Some(ranges.iter().any(|&(s, e)| line >= s && line <= e))
161}
162
163/// Per-file churn = number of commits that touched each file (relative paths).
164/// `None` if not a git repo. Used for churn×complexity hotspot ranking.
165pub fn file_churn(root: &Utf8Path) -> Option<rustc_hash::FxHashMap<String, u32>> {
166    let out = Command::new("git")
167        .arg("-C")
168        .arg(root.as_str())
169        .args(["log", "--no-merges", "--pretty=format:", "--name-only"])
170        .output()
171        .ok()?;
172    if !out.status.success() {
173        return None;
174    }
175    let mut counts: rustc_hash::FxHashMap<String, u32> = rustc_hash::FxHashMap::default();
176    for line in String::from_utf8_lossy(&out.stdout).lines() {
177        let l = line.trim();
178        if !l.is_empty() {
179            *counts.entry(l.to_string()).or_insert(0) += 1;
180        }
181    }
182    Some(counts)
183}
184
185/// Whether a finding path (possibly absolute or `./`-prefixed) is in the changed
186/// set (which holds paths relative to `root`).
187pub fn path_is_changed(
188    root: &Utf8Path,
189    finding_path: &Utf8Path,
190    changed: &FxHashSet<String>,
191) -> bool {
192    let rel = finding_path
193        .strip_prefix(root)
194        .unwrap_or(finding_path)
195        .as_str()
196        .trim_start_matches("./");
197    if changed.contains(rel) {
198        return true;
199    }
200    // Fallback: match by file name (handles path-normalization edge cases).
201    if let Some(name) = finding_path.file_name() {
202        return changed.iter().any(|c| c.ends_with(name));
203    }
204    false
205}
206
207#[cfg(test)]
208mod tests {
209    use super::*;
210
211    #[test]
212    fn parses_unified0_hunks() {
213        let diff = "\
214diff --git a/app.py b/app.py
215--- a/app.py
216+++ b/app.py
217@@ -10,0 +11,3 @@ def f():
218+x = 1
219+y = 2
220+z = 3
221@@ -20 +24 @@
222-old
223+new
224";
225        let mut map = rustc_hash::FxHashMap::default();
226        parse_unified0(diff, &mut map);
227        let ranges = map.get("app.py").unwrap();
228        assert!(ranges.contains(&(11, 13)), "got {ranges:?}");
229        assert!(ranges.contains(&(24, 24)), "got {ranges:?}");
230    }
231}